[PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter

Geliang Tang posted 4 patches 3 months ago
There is a newer version of this series
[PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter
Posted by Geliang Tang 3 months ago
From: Geliang Tang <tanggeliang@kylinos.cn>

It's necessary to traverse all subflows on the conn_list of an MPTCP
socket and then call kfunc to modify the fields of each subflow. In
kernel space, mptcp_for_each_subflow() helper is used for this:

 mptcp_for_each_subflow(msk, subflow)
         kfunc(subflow);

But in the MPTCP BPF program, this has not yet been implemented. As
Martin suggested recently, this conn_list walking + modify-by-kfunc
usage fits the bpf_iter use case.

This patch adds a new bpf_iter type named "mptcp_subflow" to do this.

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 kernel/bpf/helpers.c |  3 +++
 net/mptcp/bpf.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b5f0adae8293..2340ba967444 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
 BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
 BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 9672a70c24b0..cda09bbfd617 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
 	.set   = &bpf_mptcp_fmodret_ids,
 };
 
+struct bpf_iter__mptcp_subflow {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct mptcp_sock *, msk);
+	__bpf_md_ptr(struct list_head *, pos);
+};
+
+DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
+		     struct mptcp_sock *msk, struct list_head *pos)
+
+struct bpf_iter_mptcp_subflow {
+	__u64 __opaque[3];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_mptcp_subflow_kern {
+	struct mptcp_sock *msk;
+	struct list_head *pos;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
+					   struct mptcp_sock *msk)
+{
+	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+
+	kit->msk = msk;
+	kit->pos = &msk->conn_list;
+	spin_lock_bh(&msk->pm.lock);
+
+	return 0;
+}
+
+__bpf_kfunc struct mptcp_subflow_context *
+bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
+{
+	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = kit->msk;
+
+	subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);
+	if (list_entry_is_head(subflow, &msk->conn_list, node))
+		return NULL;
+
+	kit->pos = &subflow->node;
+	return subflow;
+}
+
+__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
+{
+	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+	struct mptcp_sock *msk = kit->msk;
+
+	spin_unlock_bh(&msk->pm.lock);
+}
+
+__bpf_kfunc_end_defs();
+
 __diag_push();
 __diag_ignore_all("-Wmissing-prototypes",
 		  "kfuncs which will be used in BPF programs");
-- 
2.43.0
Re: [PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter
Posted by Martin KaFai Lau 3 months ago
On 9/5/24 6:52 AM, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> It's necessary to traverse all subflows on the conn_list of an MPTCP
> socket and then call kfunc to modify the fields of each subflow. In
> kernel space, mptcp_for_each_subflow() helper is used for this:
> 
>   mptcp_for_each_subflow(msk, subflow)
>           kfunc(subflow);
> 
> But in the MPTCP BPF program, this has not yet been implemented. As
> Martin suggested recently, this conn_list walking + modify-by-kfunc
> usage fits the bpf_iter use case.
> 
> This patch adds a new bpf_iter type named "mptcp_subflow" to do this.
> 
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>   kernel/bpf/helpers.c |  3 +++
>   net/mptcp/bpf.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 60 insertions(+)
> 
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index b5f0adae8293..2340ba967444 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
>   BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
>   BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
> +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
> +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
>   BTF_KFUNCS_END(common_btf_ids)
>   
>   static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> index 9672a70c24b0..cda09bbfd617 100644
> --- a/net/mptcp/bpf.c
> +++ b/net/mptcp/bpf.c
> @@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
>   	.set   = &bpf_mptcp_fmodret_ids,
>   };
>   
> +struct bpf_iter__mptcp_subflow {
> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
> +	__bpf_md_ptr(struct mptcp_sock *, msk);
> +	__bpf_md_ptr(struct list_head *, pos);
> +};
> +
> +DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
> +		     struct mptcp_sock *msk, struct list_head *pos)
> +
> +struct bpf_iter_mptcp_subflow {
> +	__u64 __opaque[3];
> +} __attribute__((aligned(8)));
> +
> +struct bpf_iter_mptcp_subflow_kern {
> +	struct mptcp_sock *msk;
> +	struct list_head *pos;
> +} __attribute__((aligned(8)));
> +
> +__bpf_kfunc_start_defs();
> +
> +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
> +					   struct mptcp_sock *msk)
> +{
> +	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> +
> +	kit->msk = msk;
> +	kit->pos = &msk->conn_list;
> +	spin_lock_bh(&msk->pm.lock);

I don't think spin_lock here without unlock can be used. e.g. What if 
bpf_iter_mptcp_subflow_new() is called twice back-to-back.

I haven't looked at the mptcp details, some questions:
The list is protected by msk->pm.lock?
What happen to the sk_lock of the msk?
Can this be rcu-ify? or it needs some cares when walking the established TCP 
subflow?


[ Please cc the bpf list. Helping to review patches is a good way to contribute 
back to the mailing list. ]

> +
> +	return 0;
> +}
> +
> +__bpf_kfunc struct mptcp_subflow_context *
> +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> +{
> +	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> +	struct mptcp_subflow_context *subflow;
> +	struct mptcp_sock *msk = kit->msk;
> +
> +	subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);
> +	if (list_entry_is_head(subflow, &msk->conn_list, node))
> +		return NULL;
> +
> +	kit->pos = &subflow->node;
> +	return subflow;
> +}
> +
> +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
> +{
> +	struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> +	struct mptcp_sock *msk = kit->msk;
> +
> +	spin_unlock_bh(&msk->pm.lock);
> +}
> +
> +__bpf_kfunc_end_defs();
> +
>   __diag_push();
>   __diag_ignore_all("-Wmissing-prototypes",
>   		  "kfuncs which will be used in BPF programs");
Re: [PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter
Posted by Andrii Nakryiko 2 months, 4 weeks ago
On Thu, Sep 5, 2024 at 11:25 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 9/5/24 6:52 AM, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > It's necessary to traverse all subflows on the conn_list of an MPTCP
> > socket and then call kfunc to modify the fields of each subflow. In
> > kernel space, mptcp_for_each_subflow() helper is used for this:
> >
> >   mptcp_for_each_subflow(msk, subflow)
> >           kfunc(subflow);
> >
> > But in the MPTCP BPF program, this has not yet been implemented. As
> > Martin suggested recently, this conn_list walking + modify-by-kfunc
> > usage fits the bpf_iter use case.
> >
> > This patch adds a new bpf_iter type named "mptcp_subflow" to do this.
> >
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >   kernel/bpf/helpers.c |  3 +++
> >   net/mptcp/bpf.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 60 insertions(+)
> >
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index b5f0adae8293..2340ba967444 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
> >   BTF_KFUNCS_END(common_btf_ids)
> >
> >   static const struct btf_kfunc_id_set common_kfunc_set = {
> > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > index 9672a70c24b0..cda09bbfd617 100644
> > --- a/net/mptcp/bpf.c
> > +++ b/net/mptcp/bpf.c
> > @@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
> >       .set   = &bpf_mptcp_fmodret_ids,
> >   };
> >
> > +struct bpf_iter__mptcp_subflow {
> > +     __bpf_md_ptr(struct bpf_iter_meta *, meta);
> > +     __bpf_md_ptr(struct mptcp_sock *, msk);
> > +     __bpf_md_ptr(struct list_head *, pos);
> > +};
> > +
> > +DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
> > +                  struct mptcp_sock *msk, struct list_head *pos)

this is defining BPF iterator *program type* (effectively), which is
different from open-coded iterator. Do you need a BPF iterator program
type for this? Or open-coded iterator called from other BPF program
types would be sufficient?

> > +
> > +struct bpf_iter_mptcp_subflow {
> > +     __u64 __opaque[3];
> > +} __attribute__((aligned(8)));
> > +
> > +struct bpf_iter_mptcp_subflow_kern {
> > +     struct mptcp_sock *msk;
> > +     struct list_head *pos;
> > +} __attribute__((aligned(8)));

opaque[3], but you are using two pointers here. Why the difference?

> > +
> > +__bpf_kfunc_start_defs();
> > +
> > +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
> > +                                        struct mptcp_sock *msk)
> > +{
> > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +
> > +     kit->msk = msk;
> > +     kit->pos = &msk->conn_list;
> > +     spin_lock_bh(&msk->pm.lock);
>
> I don't think spin_lock here without unlock can be used. e.g. What if
> bpf_iter_mptcp_subflow_new() is called twice back-to-back.
>
> I haven't looked at the mptcp details, some questions:
> The list is protected by msk->pm.lock?
> What happen to the sk_lock of the msk?
> Can this be rcu-ify? or it needs some cares when walking the established TCP
> subflow?
>
>
> [ Please cc the bpf list. Helping to review patches is a good way to contribute
> back to the mailing list. ]
>
> > +
> > +     return 0;
> > +}
> > +
> > +__bpf_kfunc struct mptcp_subflow_context *
> > +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> > +{
> > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +     struct mptcp_subflow_context *subflow;
> > +     struct mptcp_sock *msk = kit->msk;
> > +
> > +     subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node);
> > +     if (list_entry_is_head(subflow, &msk->conn_list, node))
> > +             return NULL;
> > +
> > +     kit->pos = &subflow->node;
> > +     return subflow;
> > +}
> > +
> > +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
> > +{
> > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +     struct mptcp_sock *msk = kit->msk;
> > +
> > +     spin_unlock_bh(&msk->pm.lock);
> > +}
> > +
> > +__bpf_kfunc_end_defs();
> > +
> >   __diag_push();
> >   __diag_ignore_all("-Wmissing-prototypes",
> >                 "kfuncs which will be used in BPF programs");
>
>
Re: [PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter
Posted by Geliang Tang 2 months, 3 weeks ago
Hi Andrii,

On Fri, 2024-09-06 at 14:29 -0700, Andrii Nakryiko wrote:
> On Thu, Sep 5, 2024 at 11:25 AM Martin KaFai Lau
> <martin.lau@linux.dev> wrote:
> > 
> > On 9/5/24 6:52 AM, Geliang Tang wrote:
> > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > 
> > > It's necessary to traverse all subflows on the conn_list of an
> > > MPTCP
> > > socket and then call kfunc to modify the fields of each subflow.
> > > In
> > > kernel space, mptcp_for_each_subflow() helper is used for this:
> > > 
> > >   mptcp_for_each_subflow(msk, subflow)
> > >           kfunc(subflow);
> > > 
> > > But in the MPTCP BPF program, this has not yet been implemented.
> > > As
> > > Martin suggested recently, this conn_list walking + modify-by-
> > > kfunc
> > > usage fits the bpf_iter use case.
> > > 
> > > This patch adds a new bpf_iter type named "mptcp_subflow" to do
> > > this.
> > > 
> > > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > ---
> > >   kernel/bpf/helpers.c |  3 +++
> > >   net/mptcp/bpf.c      | 57
> > > ++++++++++++++++++++++++++++++++++++++++++++
> > >   2 files changed, 60 insertions(+)
> > > 
> > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > index b5f0adae8293..2340ba967444 100644
> > > --- a/kernel/bpf/helpers.c
> > > +++ b/kernel/bpf/helpers.c
> > > @@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
> > >   BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> > >   BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT |
> > > KF_RET_NULL)
> > >   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> > > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
> > > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
> > > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
> > >   BTF_KFUNCS_END(common_btf_ids)
> > > 
> > >   static const struct btf_kfunc_id_set common_kfunc_set = {
> > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > > index 9672a70c24b0..cda09bbfd617 100644
> > > --- a/net/mptcp/bpf.c
> > > +++ b/net/mptcp/bpf.c
> > > @@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set
> > > bpf_mptcp_fmodret_set = {
> > >       .set   = &bpf_mptcp_fmodret_ids,
> > >   };
> > > 
> > > +struct bpf_iter__mptcp_subflow {
> > > +     __bpf_md_ptr(struct bpf_iter_meta *, meta);
> > > +     __bpf_md_ptr(struct mptcp_sock *, msk);
> > > +     __bpf_md_ptr(struct list_head *, pos);
> > > +};
> > > +
> > > +DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
> > > +                  struct mptcp_sock *msk, struct list_head *pos)
> 
> this is defining BPF iterator *program type* (effectively), which is
> different from open-coded iterator. Do you need a BPF iterator
> program
> type for this? Or open-coded iterator called from other BPF program
> types would be sufficient?

Yes, no need to define DEFINE_BPF_ITER_FUNC here, will drop it in v2.

> 
> > > +
> > > +struct bpf_iter_mptcp_subflow {
> > > +     __u64 __opaque[3];
> > > +} __attribute__((aligned(8)));
> > > +
> > > +struct bpf_iter_mptcp_subflow_kern {
> > > +     struct mptcp_sock *msk;
> > > +     struct list_head *pos;
> > > +} __attribute__((aligned(8)));
> 
> opaque[3], but you are using two pointers here. Why the difference?

Should be 2, not 3. will update in v2.

Thanks,
-Geliang

> 
> > > +
> > > +__bpf_kfunc_start_defs();
> > > +
> > > +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct
> > > bpf_iter_mptcp_subflow *it,
> > > +                                        struct mptcp_sock *msk)
> > > +{
> > > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > > +
> > > +     kit->msk = msk;
> > > +     kit->pos = &msk->conn_list;
> > > +     spin_lock_bh(&msk->pm.lock);
> > 
> > I don't think spin_lock here without unlock can be used. e.g. What
> > if
> > bpf_iter_mptcp_subflow_new() is called twice back-to-back.
> > 
> > I haven't looked at the mptcp details, some questions:
> > The list is protected by msk->pm.lock?
> > What happen to the sk_lock of the msk?
> > Can this be rcu-ify? or it needs some cares when walking the
> > established TCP
> > subflow?
> > 
> > 
> > [ Please cc the bpf list. Helping to review patches is a good way
> > to contribute
> > back to the mailing list. ]
> > 
> > > +
> > > +     return 0;
> > > +}
> > > +
> > > +__bpf_kfunc struct mptcp_subflow_context *
> > > +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> > > +{
> > > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > > +     struct mptcp_subflow_context *subflow;
> > > +     struct mptcp_sock *msk = kit->msk;
> > > +
> > > +     subflow = list_entry((kit->pos)->next, struct
> > > mptcp_subflow_context, node);
> > > +     if (list_entry_is_head(subflow, &msk->conn_list, node))
> > > +             return NULL;
> > > +
> > > +     kit->pos = &subflow->node;
> > > +     return subflow;
> > > +}
> > > +
> > > +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct
> > > bpf_iter_mptcp_subflow *it)
> > > +{
> > > +     struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > > +     struct mptcp_sock *msk = kit->msk;
> > > +
> > > +     spin_unlock_bh(&msk->pm.lock);
> > > +}
> > > +
> > > +__bpf_kfunc_end_defs();
> > > +
> > >   __diag_push();
> > >   __diag_ignore_all("-Wmissing-prototypes",
> > >                 "kfuncs which will be used in BPF programs");
> > 
> > 

Re: [PATCH mptcp-next 1/4] bpf: Add mptcp_subflow bpf_iter
Posted by Geliang Tang 2 months, 4 weeks ago
Hi Martin,

On Thu, 2024-09-05 at 11:24 -0700, Martin KaFai Lau wrote:
> On 9/5/24 6:52 AM, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > It's necessary to traverse all subflows on the conn_list of an
> > MPTCP
> > socket and then call kfunc to modify the fields of each subflow. In
> > kernel space, mptcp_for_each_subflow() helper is used for this:
> > 
> >   mptcp_for_each_subflow(msk, subflow)
> >           kfunc(subflow);
> > 
> > But in the MPTCP BPF program, this has not yet been implemented. As
> > Martin suggested recently, this conn_list walking + modify-by-kfunc
> > usage fits the bpf_iter use case.
> > 
> > This patch adds a new bpf_iter type named "mptcp_subflow" to do
> > this.
> > 
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >   kernel/bpf/helpers.c |  3 +++
> >   net/mptcp/bpf.c      | 57
> > ++++++++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 60 insertions(+)
> > 
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index b5f0adae8293..2340ba967444 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -3023,6 +3023,9 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT |
> > KF_RET_NULL)
> >   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next)
> > +BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy)
> >   BTF_KFUNCS_END(common_btf_ids)
> >   
> >   static const struct btf_kfunc_id_set common_kfunc_set = {
> > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > index 9672a70c24b0..cda09bbfd617 100644
> > --- a/net/mptcp/bpf.c
> > +++ b/net/mptcp/bpf.c
> > @@ -204,6 +204,63 @@ static const struct btf_kfunc_id_set
> > bpf_mptcp_fmodret_set = {
> >    .set   = &bpf_mptcp_fmodret_ids,
> >   };
> >   
> > +struct bpf_iter__mptcp_subflow {
> > + __bpf_md_ptr(struct bpf_iter_meta *, meta);
> > + __bpf_md_ptr(struct mptcp_sock *, msk);
> > + __bpf_md_ptr(struct list_head *, pos);
> > +};
> > +
> > +DEFINE_BPF_ITER_FUNC(mptcp_subflow, struct bpf_iter_meta *meta,
> > +      struct mptcp_sock *msk, struct list_head *pos)
> > +
> > +struct bpf_iter_mptcp_subflow {
> > + __u64 __opaque[3];
> > +} __attribute__((aligned(8)));
> > +
> > +struct bpf_iter_mptcp_subflow_kern {
> > + struct mptcp_sock *msk;
> > + struct list_head *pos;
> > +} __attribute__((aligned(8)));
> > +
> > +__bpf_kfunc_start_defs();
> > +
> > +__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct
> > bpf_iter_mptcp_subflow *it,
> > +    struct mptcp_sock *msk)
> > +{
> > + struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > +
> > + kit->msk = msk;
> > + kit->pos = &msk->conn_list;
> > + spin_lock_bh(&msk->pm.lock);
> 
> I don't think spin_lock here without unlock can be used. e.g. What if
> bpf_iter_mptcp_subflow_new() is called twice back-to-back.
> 
> I haven't looked at the mptcp details, some questions:
> The list is protected by msk->pm.lock?
> What happen to the sk_lock of the msk?
> Can this be rcu-ify? or it needs some cares when walking the
> established TCP 
> subflow?

Thank you for your review. msk->pm.lock shouldn't be used here. The
conn_list is not protected by msk->pm.lock. I will remove it in v2.

> 
> 
> [ Please cc the bpf list. Helping to review patches is a good way to
> contribute 
> back to the mailing list. ]

This patch is for "mptcp-next", it depends on the "new MPTCP subflow
subtest" which is under review on the bpf list. We will send it to the
bpf list very soon.

Thanks,
-Geliang

> 
> > +
> > + return 0;
> > +}
> > +
> > +__bpf_kfunc struct mptcp_subflow_context *
> > +bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
> > +{
> > + struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > + struct mptcp_subflow_context *subflow;
> > + struct mptcp_sock *msk = kit->msk;
> > +
> > + subflow = list_entry((kit->pos)->next, struct
> > mptcp_subflow_context, node);
> > + if (list_entry_is_head(subflow, &msk->conn_list, node))
> > + return NULL;
> > +
> > + kit->pos = &subflow->node;
> > + return subflow;
> > +}
> > +
> > +__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct
> > bpf_iter_mptcp_subflow *it)
> > +{
> > + struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
> > + struct mptcp_sock *msk = kit->msk;
> > +
> > + spin_unlock_bh(&msk->pm.lock);
> > +}
> > +
> > +__bpf_kfunc_end_defs();
> > +
> >   __diag_push();
> >   __diag_ignore_all("-Wmissing-prototypes",
> >      "kfuncs which will be used in BPF programs");
>