On Tue, 31 May 2022, Geliang Tang wrote:
> Please update the commit log:
>
> '''
> This patch defines two new wrappers mptcp_sched_get_send() and
> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> Use them instead of using mptcp_subflow_get_send() or
> mptcp_subflow_get_retrans() directly.
>
> Set the subflow pointers array in struct mptcp_sched_data before invoking
> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>
> Check the subflow scheduled flags to test which subflow or subflows are
> picked by the scheduler.
> '''
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/sched.c | 54 +++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 43 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 3ceb721e6489..613b7005938c 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -88,11 +88,25 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> bpf_module_put(sched, sched->owner);
> }
>
> -static int mptcp_sched_data_init(struct mptcp_sock *msk,
> +static int mptcp_sched_data_init(struct mptcp_sock *msk, bool reinject,
> struct mptcp_sched_data *data)
> {
> - data->sock = NULL;
> - data->call_again = 0;
> + struct mptcp_subflow_context *subflow;
> + int i = 0;
> +
> + data->reinject = reinject;
> +
> + mptcp_for_each_subflow(msk, subflow) {
> + if (i == MPTCP_SUBFLOWS_MAX) {
> + pr_warn_once("too many subflows");
> + break;
> + }
> + WRITE_ONCE(subflow->scheduled, false);
If subflow->scheduled is using READ_ONCE/WRITE_ONCE semantics, then
writing it directly from BPF code is going to be a problem. The code in
this patch set would work ok since all read and write access is under the
msk lock, but I think integrating with all of the transmit and retransmit
code (especially transmission in mptcp_subflow_process_delegated()) would
make it important to use WRITE_ONCE() to set subflow->scheduled.
I think that requires using a C helper function called from BPF to do
WRITE_ONCE(subflow->scheduled), or using a lock to order accesses. The
mptcp_data_lock is already used in mptcp_subflow_process_delegated() but
we probably don't want to add more locking to mptcp_sendmsg(). That makes
me think the helper function might be better - unless there's a generic
BPF technique for using WRITE_ONCE.
> + data->contexts[i++] = subflow;
> + }
> +
> + for (; i < MPTCP_SUBFLOWS_MAX; i++)
> + data->contexts[i] = NULL;
>
> return 0;
> }
> @@ -100,6 +114,8 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> struct mptcp_sched_data data;
> + struct sock *ssk = NULL;
> + int i;
>
> sock_owned_by_me((struct sock *)msk);
>
> @@ -113,16 +129,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> if (!msk->sched)
> return mptcp_subflow_get_send(msk);
>
> - mptcp_sched_data_init(msk, &data);
> - msk->sched->get_subflow(msk, false, &data);
> + mptcp_sched_data_init(msk, false, &data);
> + msk->sched->get_subflow(msk, &data);
> +
> + for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> + if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
> + ssk = data.contexts[i]->tcp_sock;
> + msk->last_snd = ssk;
> + break;
> + }
> + }
I think this is ok for a placeholder until more of the transmit
integration is done.
>
> - msk->last_snd = data.sock;
> - return data.sock;
> + return ssk;
> }
>
> struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> struct mptcp_sched_data data;
> + struct sock *ssk = NULL;
> + int i;
>
> sock_owned_by_me((const struct sock *)msk);
>
> @@ -133,9 +158,16 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> if (!msk->sched)
> return mptcp_subflow_get_retrans(msk);
>
> - mptcp_sched_data_init(msk, &data);
> - msk->sched->get_subflow(msk, true, &data);
> + mptcp_sched_data_init(msk, true, &data);
> + msk->sched->get_subflow(msk, &data);
> +
> + for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> + if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
> + ssk = data.contexts[i]->tcp_sock;
> + msk->last_snd = ssk;
> + break;
> + }
> + }
>
> - msk->last_snd = data.sock;
> - return data.sock;
> + return ssk;
> }
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel