'''
mptcp: add __mptcp_sched_get_send wrapper
This patch defines the wrapper __mptcp_sched_get_send(), invoke
get_subflow() of msk->sched in it. Use this wrapper instead of using
mptcp_subflow_get_send() directly in __mptcp_push_pending().
Set the subflow pointers array in struct mptcp_sched_data before invoking
get_subflow(), then it can be used in get_subflow() in the BPF contexts.
Check the subflow scheduled flags in __mptcp_push_pending() to test which
subflow or subflows are picked by the scheduler, use them to send data.
'''
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 76 ++++++++++++++++++++++++--------------------
net/mptcp/protocol.h | 4 +--
net/mptcp/sched.c | 59 ++++++++--------------------------
net/mptcp/subflow.c | 1 -
4 files changed, 56 insertions(+), 84 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d6aef4b13b8a..c7a1e15a66fe 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1551,58 +1551,59 @@ void mptcp_check_and_set_pending(struct sock *sk)
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
- struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sendmsg_info info = {
- .flags = flags,
+ .flags = flags,
};
struct mptcp_data_frag *dfrag;
- int len, copied = 0;
+ int len, copied = 0, err;
+ struct sock *ssk = NULL;
while ((dfrag = mptcp_send_head(sk))) {
info.sent = dfrag->already_sent;
info.limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
- int ret = 0;
+ int ret = 0, max = 0;
- prev_ssk = ssk;
- ssk = mptcp_sched_get_send(msk);
-
- /* First check. If the ssk has changed since
- * the last round, release prev_ssk
- */
- if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(prev_ssk, &info);
- if (!ssk)
+ err = __mptcp_sched_get_send(msk);
+ if (err)
goto out;
- /* Need to lock the new subflow only if different
- * from the previous one, otherwise we are still
- * helding the relevant lock
- */
- if (ssk != prev_ssk)
- lock_sock(ssk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (!ssk)
+ goto out;
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0) {
- mptcp_push_release(ssk, &info);
- goto out;
+ lock_sock(ssk);
+
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ if (ret <= 0) {
+ mptcp_push_release(ssk, &info);
+ goto out;
+ }
+
+ if (ret > max)
+ max = ret;
+
+ mptcp_push_release(ssk, &info);
+
+ msk->last_snd = ssk;
+ mptcp_subflow_set_scheduled(subflow, false);
+ }
}
- info.sent += ret;
- copied += ret;
- len -= ret;
+ info.sent += max;
+ copied += max;
+ len -= max;
- mptcp_update_post_push(msk, dfrag, ret);
+ mptcp_update_post_push(msk, dfrag, max);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
- /* at this point we held the socket lock for the last subflow we used */
- if (ssk)
- mptcp_push_release(ssk, &info);
-
out:
/* ensure the rtx timer is running */
if (!mptcp_timer_pending(sk))
@@ -1634,7 +1635,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -2195,12 +2196,17 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
+static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
+ sock_owned_by_me((const struct sock *)msk);
+
+ if (__mptcp_check_fallback(msk))
+ return NULL;
+
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -2453,7 +2459,7 @@ static void __mptcp_retrans(struct sock *sk)
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_sched_get_retrans(msk);
+ ssk = mptcp_subflow_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -3107,7 +3113,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
return;
if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk));
+ struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d406b5afbee4..a12507dcf1d5 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -632,9 +632,7 @@ void mptcp_release_sched(struct mptcp_sock *msk);
void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled);
struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
-struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk);
+int __mptcp_sched_get_send(struct mptcp_sock *msk);
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 8858e1fc8b74..9427d15b4c69 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -117,63 +117,32 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk, bool reinject,
return 0;
}
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
+int __mptcp_sched_get_send(struct mptcp_sock *msk)
{
struct mptcp_sched_data data;
struct sock *ssk = NULL;
- int i;
sock_owned_by_me((struct sock *)msk);
/* the following check is moved out of mptcp_subflow_get_send */
if (__mptcp_check_fallback(msk)) {
- if (!msk->first)
- return NULL;
- return sk_stream_memory_free(msk->first) ? msk->first : NULL;
- }
-
- if (!msk->sched)
- return mptcp_subflow_get_send(msk);
-
- mptcp_sched_data_init(msk, false, &data);
- msk->sched->get_subflow(msk, &data);
-
- for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
- if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
- ssk = data.contexts[i]->tcp_sock;
- msk->last_snd = ssk;
- break;
+ if (msk->first && sk_stream_memory_free(msk->first)) {
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+ return 0;
}
+ return -EINVAL;
}
- return ssk;
-}
-
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
-{
- struct mptcp_sched_data data;
- struct sock *ssk = NULL;
- int i;
-
- sock_owned_by_me((const struct sock *)msk);
-
- /* the following check is moved out of mptcp_subflow_get_retrans */
- if (__mptcp_check_fallback(msk))
- return NULL;
-
- if (!msk->sched)
- return mptcp_subflow_get_retrans(msk);
+ if (!msk->sched) {
+ ssk = mptcp_subflow_get_send(msk);
+ if (!ssk)
+ return -EINVAL;
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+ }
- mptcp_sched_data_init(msk, true, &data);
+ mptcp_sched_data_init(msk, false, &data);
msk->sched->get_subflow(msk, &data);
- for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
- if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
- ssk = data.contexts[i]->tcp_sock;
- msk->last_snd = ssk;
- break;
- }
- }
-
- return ssk;
+ return 0;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8841e8cd9ad8..e7864a413192 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -881,7 +881,6 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
subflow->map_data_len))) {
/* Mapping does covers past subflow data, invalid */
dbg_bad_map(subflow, ssn);
- return false;
}
return true;
}
--
2.34.1