net/mptcp/protocol.c | 46 +++++++++++++++++++++++--------------------- net/mptcp/protocol.h | 18 ++++++++++------- net/mptcp/subflow.c | 4 ++-- 3 files changed, 37 insertions(+), 31 deletions(-)
Currently the msk->flags bitmask carries both state for the
mptcp_release_cb() - mostly touched under the mptcp data lock
- and others state info touched even outside such lock scope.
As a consequence, msk->flags is always manipulated with
atomic operations.
This change splits such bitmask in two separate fields, so
that we use plain bit oper operations when touching the
cb-related info.
The MPTCP_PUSH_PENDING bit needs additional care, as it is the
only CB related field currently accessed either under the mptcp
data lock or the mptcp socket lock.
Let's add another mask just for such bit's sake.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v5 -> v6:
- use BIT() where appropriate
---
net/mptcp/protocol.c | 46 +++++++++++++++++++++++---------------------
net/mptcp/protocol.h | 18 ++++++++++-------
net/mptcp/subflow.c | 4 ++--
3 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e81fd46a43c4..5a735c37cb8e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -763,7 +763,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &msk->cb_flags);
}
/* If the moves have caught up with the DATA_FIN sequence number
@@ -1529,9 +1529,8 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk) &&
- !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ if (mptcp_send_head(sk))
+ mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
@@ -2146,7 +2145,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
mptcp_schedule_work(sk);
} else {
/* delegate our work to tcp_release_cb() */
- set_bit(MPTCP_RETRANSMIT, &msk->flags);
+ __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -2858,7 +2857,9 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk);
msk->last_snd = NULL;
- msk->flags = 0;
+ WRITE_ONCE(msk->flags, 0);
+ msk->cb_flags = 0;
+ msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3041,7 +3042,7 @@ void __mptcp_data_acked(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_clean_una(sk);
else
- set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
if (mptcp_pending_data_fin_ack(sk))
mptcp_schedule_work(sk);
@@ -3060,22 +3061,22 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
else if (xmit_ssk)
mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
} else {
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
}
}
+#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
+ BIT(MPTCP_RETRANSMIT) | \
+ BIT(MPTCP_FLUSH_JOIN_LIST))
+
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
for (;;) {
- unsigned long flags = 0;
-
- if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_PUSH_PENDING);
- if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_RETRANSMIT);
- if (test_and_clear_bit(MPTCP_FLUSH_JOIN_LIST, &mptcp_sk(sk)->flags))
- flags |= BIT(MPTCP_FLUSH_JOIN_LIST);
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
+ msk->push_pending;
if (!flags)
break;
@@ -3086,7 +3087,8 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
-
+ msk->push_pending = 0;
+ msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
__mptcp_flush_join_list(sk);
@@ -3102,11 +3104,11 @@ static void mptcp_release_cb(struct sock *sk)
/* be sure to set the current sk state before tacking actions
* depending on sk_state
*/
- if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
__mptcp_set_connected(sk);
- if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
__mptcp_update_rmem(sk);
@@ -3148,7 +3150,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk);
else
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
@@ -3268,7 +3270,7 @@ bool mptcp_finish_join(struct sock *ssk)
} else {
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->join_list);
- set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->flags);
+ __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags);
}
mptcp_data_unlock(parent);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 27a510b20996..0459f164dc0b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -110,18 +110,20 @@
/* MPTCP TCPRST flags */
#define MPTCP_RST_TRANSIENT BIT(0)
-/* MPTCP socket flags */
+/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
-#define MPTCP_PUSH_PENDING 6
-#define MPTCP_CLEAN_UNA 7
-#define MPTCP_ERROR_REPORT 8
-#define MPTCP_RETRANSMIT 9
-#define MPTCP_FLUSH_JOIN_LIST 10
-#define MPTCP_CONNECTED 11
+
+/* MPTCP socket release cb flags */
+#define MPTCP_PUSH_PENDING 1
+#define MPTCP_CLEAN_UNA 2
+#define MPTCP_ERROR_REPORT 3
+#define MPTCP_RETRANSMIT 4
+#define MPTCP_FLUSH_JOIN_LIST 5
+#define MPTCP_CONNECTED 6
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -249,6 +251,8 @@ struct mptcp_sock {
u32 token;
int rmem_released;
unsigned long flags;
+ unsigned long cb_flags;
+ unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8716b9cb8040..557ef71309b0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_set_connected(sk);
else
- set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -1279,7 +1279,7 @@ static void subflow_error_report(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
--
2.33.1
On Wed, 15 Dec 2021, Paolo Abeni wrote: > Currently the msk->flags bitmask carries both state for the > mptcp_release_cb() - mostly touched under the mptcp data lock > - and others state info touched even outside such lock scope. > > As a consequence, msk->flags is always manipulated with > atomic operations. > > This change splits such bitmask in two separate fields, so > that we use plain bit oper operations when touching the > cb-related info. > > The MPTCP_PUSH_PENDING bit needs additional care, as it is the > only CB related field currently accessed either under the mptcp > data lock or the mptcp socket lock. > Let's add another mask just for such bit's sake. > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > --- > v5 -> v6: > - use BIT() where appropriate Thanks for noticing that. I've double checked all the affected flag names and it looks good to me now. Tests are running ok for me (just the intermittent ipv6 remove subflow failure that's unrelated to this commit). Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Mat > --- > net/mptcp/protocol.c | 46 +++++++++++++++++++++++--------------------- > net/mptcp/protocol.h | 18 ++++++++++------- > net/mptcp/subflow.c | 4 ++-- > 3 files changed, 37 insertions(+), 31 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index e81fd46a43c4..5a735c37cb8e 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -763,7 +763,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) > if (!sock_owned_by_user(sk)) > __mptcp_error_report(sk); > else > - set_bit(MPTCP_ERROR_REPORT, &msk->flags); > + __set_bit(MPTCP_ERROR_REPORT, &msk->cb_flags); > } > > /* If the moves have caught up with the DATA_FIN sequence number > @@ -1529,9 +1529,8 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, > > void mptcp_check_and_set_pending(struct sock *sk) > { > - if (mptcp_send_head(sk) && > - !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) > - set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); > + if (mptcp_send_head(sk)) > + mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); > } > > void __mptcp_push_pending(struct sock *sk, unsigned int flags) > @@ -2146,7 +2145,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) > mptcp_schedule_work(sk); > } else { > /* delegate our work to tcp_release_cb() */ > - set_bit(MPTCP_RETRANSMIT, &msk->flags); > + __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags); > } > bh_unlock_sock(sk); > sock_put(sk); > @@ -2858,7 +2857,9 @@ static int mptcp_disconnect(struct sock *sk, int flags) > > mptcp_destroy_common(msk); > msk->last_snd = NULL; > - msk->flags = 0; > + WRITE_ONCE(msk->flags, 0); > + msk->cb_flags = 0; > + msk->push_pending = 0; > msk->recovery = false; > msk->can_ack = false; > msk->fully_established = false; > @@ -3041,7 +3042,7 @@ void __mptcp_data_acked(struct sock *sk) > if (!sock_owned_by_user(sk)) > __mptcp_clean_una(sk); > else > - set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags); > + __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); > > if (mptcp_pending_data_fin_ack(sk)) > mptcp_schedule_work(sk); > @@ -3060,22 +3061,22 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) > else if (xmit_ssk) > mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND); > } else { > - set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); > + __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); > } > } > > +#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ > + BIT(MPTCP_RETRANSMIT) | \ > + BIT(MPTCP_FLUSH_JOIN_LIST)) > + > /* processes deferred events and flush wmem */ > static void mptcp_release_cb(struct sock *sk) > { > + struct mptcp_sock *msk = mptcp_sk(sk); > + > for (;;) { > - unsigned long flags = 0; > - > - if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) > - flags |= BIT(MPTCP_PUSH_PENDING); > - if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) > - flags |= BIT(MPTCP_RETRANSMIT); > - if (test_and_clear_bit(MPTCP_FLUSH_JOIN_LIST, &mptcp_sk(sk)->flags)) > - flags |= BIT(MPTCP_FLUSH_JOIN_LIST); > + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | > + msk->push_pending; > if (!flags) > break; > > @@ -3086,7 +3087,8 @@ static void mptcp_release_cb(struct sock *sk) > * datapath acquires the msk socket spinlock while helding > * the subflow socket lock > */ > - > + msk->push_pending = 0; > + msk->cb_flags &= ~flags; > spin_unlock_bh(&sk->sk_lock.slock); > if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) > __mptcp_flush_join_list(sk); > @@ -3102,11 +3104,11 @@ static void mptcp_release_cb(struct sock *sk) > /* be sure to set the current sk state before tacking actions > * depending on sk_state > */ > - if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags)) > + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) > __mptcp_set_connected(sk); > - if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) > + if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) > __mptcp_clean_una_wakeup(sk); > - if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) > + if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) > __mptcp_error_report(sk); > > __mptcp_update_rmem(sk); > @@ -3148,7 +3150,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk) > if (!sock_owned_by_user(sk)) > __mptcp_subflow_push_pending(sk, ssk); > else > - set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); > + __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); > mptcp_data_unlock(sk); > mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND); > } > @@ -3268,7 +3270,7 @@ bool mptcp_finish_join(struct sock *ssk) > } else { > sock_hold(ssk); > list_add_tail(&subflow->node, &msk->join_list); > - set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->flags); > + __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags); > } > mptcp_data_unlock(parent); > > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index 27a510b20996..0459f164dc0b 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -110,18 +110,20 @@ > /* MPTCP TCPRST flags */ > #define MPTCP_RST_TRANSIENT BIT(0) > > -/* MPTCP socket flags */ > +/* MPTCP socket atomic flags */ > #define MPTCP_NOSPACE 1 > #define MPTCP_WORK_RTX 2 > #define MPTCP_WORK_EOF 3 > #define MPTCP_FALLBACK_DONE 4 > #define MPTCP_WORK_CLOSE_SUBFLOW 5 > -#define MPTCP_PUSH_PENDING 6 > -#define MPTCP_CLEAN_UNA 7 > -#define MPTCP_ERROR_REPORT 8 > -#define MPTCP_RETRANSMIT 9 > -#define MPTCP_FLUSH_JOIN_LIST 10 > -#define MPTCP_CONNECTED 11 > + > +/* MPTCP socket release cb flags */ > +#define MPTCP_PUSH_PENDING 1 > +#define MPTCP_CLEAN_UNA 2 > +#define MPTCP_ERROR_REPORT 3 > +#define MPTCP_RETRANSMIT 4 > +#define MPTCP_FLUSH_JOIN_LIST 5 > +#define MPTCP_CONNECTED 6 > > static inline bool before64(__u64 seq1, __u64 seq2) > { > @@ -249,6 +251,8 @@ struct mptcp_sock { > u32 token; > int rmem_released; > unsigned long flags; > + unsigned long cb_flags; > + unsigned long push_pending; > bool recovery; /* closing subflow write queue reinjected */ > bool can_ack; > bool fully_established; > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > index 8716b9cb8040..557ef71309b0 100644 > --- a/net/mptcp/subflow.c > +++ b/net/mptcp/subflow.c > @@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk) > if (!sock_owned_by_user(sk)) > __mptcp_set_connected(sk); > else > - set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags); > + __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); > mptcp_data_unlock(sk); > } > > @@ -1279,7 +1279,7 @@ static void subflow_error_report(struct sock *ssk) > if (!sock_owned_by_user(sk)) > __mptcp_error_report(sk); > else > - set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); > + __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags); > mptcp_data_unlock(sk); > } > > -- > 2.33.1 > > > -- Mat Martineau Intel
Hi Paolo, Mat, On 15/12/2021 16:53, Paolo Abeni wrote: > Currently the msk->flags bitmask carries both state for the > mptcp_release_cb() - mostly touched under the mptcp data lock > - and others state info touched even outside such lock scope. > > As a consequence, msk->flags is always manipulated with > atomic operations. > > This change splits such bitmask in two separate fields, so > that we use plain bit oper operations when touching the > cb-related info. > > The MPTCP_PUSH_PENDING bit needs additional care, as it is the > only CB related field currently accessed either under the mptcp > data lock or the mptcp socket lock. > Let's add another mask just for such bit's sake. > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> Thank you for the new version and the review! I just applied this patch at the end with Mat's RvB tag and __most_hold() as discussed on IRC: - bcccc445fb2e: mptcp: avoid atomic bit manipulation when possible - Results: c85bd1253bfa..1fdc9db07613 Builds and tests are now in progress: https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20211216T151318 https://github.com/multipath-tcp/mptcp_net-next/actions/workflows/build-validation.yml?query=branch:export Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
© 2016 - 2024 Red Hat, Inc.