From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
.../selftests/bpf/progs/mptcp_subflow.c | 69 +++++++++++++++++++
2 files changed, 96 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..92d5deed0214 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,36 @@
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 2e28f4a215b5..70302477e326 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,10 +4,12 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
char cc[TCP_CA_NAME_MAX] = "reno";
+int pid;
/* Associate a subflow counter to each token */
struct {
@@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ struct mptcp_sock *msk;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ if (!sk || sk->protocol != IPPROTO_MPTCP ||
+ (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
+ !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
+ return 1;
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.subflows != 1) {
+ ctx->retval = -1;
+ return 1;
+ }
+
+ if (ctx->optname == SO_MARK)
+ return _check_getsockopt_subflow_mark(msk, ctx);
+ return _check_getsockopt_subflow_cc(msk, ctx);
+}
--
2.43.0
Hi Matt,
On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch adds a "cgroup/getsockopt" way to inspect the subflows of
> a
> mptcp socket.
>
> mptcp_for_each_stubflow() and other helpers related to list_dentry
> are
> added into progs/mptcp_bpf.h.
>
> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and
> use
> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> allow
> to inspect all the fields in a tcp_sock.
>
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
> .../selftests/bpf/progs/mptcp_subflow.c | 69
> +++++++++++++++++++
> 2 files changed, 96 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> index 782f36ed027e..92d5deed0214 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> @@ -4,9 +4,36 @@
>
> #include <vmlinux.h>
> #include <bpf/bpf_core_read.h>
> +#include "bpf_experimental.h"
>
> #define MPTCP_SUBFLOWS_MAX 8
>
> +static inline int list_is_head(const struct list_head *list,
> + const struct list_head *head)
> +{
> + return list == head;
> +}
> +
> +#define list_entry(ptr, type,
> member) \
> + container_of(ptr, type, member)
> +
> +#define list_first_entry(ptr, type,
> member) \
> + list_entry((ptr)->next, type, member)
> +
> +#define list_next_entry(pos,
> member) \
> + list_entry((pos)->member.next, typeof(*(pos)), member)
> +
> +#define list_entry_is_head(pos, head,
> member) \
> + list_is_head(&pos->member, (head))
> +
> +#define list_for_each_entry(pos, head,
> member) \
> + for (pos = list_first_entry(head, typeof(*pos),
> member); \
> + cond_break, !list_entry_is_head(pos, head,
> member); \
> + pos = list_next_entry(pos, member))
> +
> +#define mptcp_for_each_subflow(__msk,
> __subflow) \
> + list_for_each_entry(__subflow, &((__msk)->conn_list), node)
> +
The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf:
Add bpf_rr scheduler & test" needs be added into this commit:
static __always_inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
return subflow->tcp_sock;
}
Otherwise, compiling errors occur.
Thanks,
-Geliang
> extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context
> *subflow,
> bool scheduled) __ksym;
>
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> index 2e28f4a215b5..70302477e326 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> @@ -4,10 +4,12 @@
>
> /* vmlinux.h, bpf_helpers.h and other 'define' */
> #include "bpf_tracing_net.h"
> +#include "mptcp_bpf.h"
>
> char _license[] SEC("license") = "GPL";
>
> char cc[TCP_CA_NAME_MAX] = "reno";
> +int pid;
>
> /* Associate a subflow counter to each token */
> struct {
> @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>
> return 1;
> }
> +
> +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk,
> struct bpf_sockopt *ctx)
> +{
> + struct mptcp_subflow_context *subflow;
> + int i = 0;
> +
> + mptcp_for_each_subflow(msk, subflow) {
> + struct sock *ssk;
> +
> + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> + struct
> mptcp_subflow_context));
> +
> + if (ssk->sk_mark != ++i) {
> + ctx->retval = -2;
> + break;
> + }
> + }
> +
> + return 1;
> +}
> +
> +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> struct bpf_sockopt *ctx)
> +{
> + struct mptcp_subflow_context *subflow;
> +
> + mptcp_for_each_subflow(msk, subflow) {
> + struct inet_connection_sock *icsk;
> + struct sock *ssk;
> +
> + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> + struct
> mptcp_subflow_context));
> + icsk = bpf_core_cast(ssk, struct
> inet_connection_sock);
> +
> + if (ssk->sk_mark == 2 &&
> + __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> TCP_CA_NAME_MAX)) {
> + ctx->retval = -2;
> + break;
> + }
> + }
> +
> + return 1;
> +}
> +
> +SEC("cgroup/getsockopt")
> +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> +{
> + struct bpf_sock *sk = ctx->sk;
> + struct mptcp_sock *msk;
> +
> + if (bpf_get_current_pid_tgid() >> 32 != pid)
> + return 1;
> +
> + if (!sk || sk->protocol != IPPROTO_MPTCP ||
> + (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> &&
> + !(ctx->level == SOL_TCP && ctx->optname ==
> TCP_CONGESTION)))
> + return 1;
> +
> + msk = bpf_core_cast(sk, struct mptcp_sock);
> + if (msk->pm.subflows != 1) {
> + ctx->retval = -1;
> + return 1;
> + }
> +
> + if (ctx->optname == SO_MARK)
> + return _check_getsockopt_subflow_mark(msk, ctx);
> + return _check_getsockopt_subflow_cc(msk, ctx);
> +}
Hi Geliang,
On 05/09/2024 10:06, Geliang Tang wrote:
> Hi Matt,
>
> On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>
>> This patch adds a "cgroup/getsockopt" way to inspect the subflows of
>> a
>> mptcp socket.
>>
>> mptcp_for_each_stubflow() and other helpers related to list_dentry
>> are
>> added into progs/mptcp_bpf.h.
>>
>> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and
>> use
>> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
>> allow
>> to inspect all the fields in a tcp_sock.
>>
>> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>> ---
>> tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
>> .../selftests/bpf/progs/mptcp_subflow.c | 69
>> +++++++++++++++++++
>> 2 files changed, 96 insertions(+)
>>
>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> index 782f36ed027e..92d5deed0214 100644
>> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> @@ -4,9 +4,36 @@
>>
>> #include <vmlinux.h>
>> #include <bpf/bpf_core_read.h>
>> +#include "bpf_experimental.h"
>>
>> #define MPTCP_SUBFLOWS_MAX 8
>>
>> +static inline int list_is_head(const struct list_head *list,
>> + const struct list_head *head)
>> +{
>> + return list == head;
>> +}
>> +
>> +#define list_entry(ptr, type,
>> member) \
>> + container_of(ptr, type, member)
>> +
>> +#define list_first_entry(ptr, type,
>> member) \
>> + list_entry((ptr)->next, type, member)
>> +
>> +#define list_next_entry(pos,
>> member) \
>> + list_entry((pos)->member.next, typeof(*(pos)), member)
>> +
>> +#define list_entry_is_head(pos, head,
>> member) \
>> + list_is_head(&pos->member, (head))
>> +
>> +#define list_for_each_entry(pos, head,
>> member) \
>> + for (pos = list_first_entry(head, typeof(*pos),
>> member); \
>> + cond_break, !list_entry_is_head(pos, head,
>> member); \
>> + pos = list_next_entry(pos, member))
>> +
>> +#define mptcp_for_each_subflow(__msk,
>> __subflow) \
>> + list_for_each_entry(__subflow, &((__msk)->conn_list), node)
>> +
>
> The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf:
> Add bpf_rr scheduler & test" needs be added into this commit:
>
> static __always_inline struct sock *
> mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> {
> return subflow->tcp_sock;
> }
>
> Otherwise, compiling errors occur.
Thank you for the heads-up. Please note that the file doesn't exist in
net-next, so we need to add it, and update the MAINTAINERS file. I'm
fixing that. While at it, I'm also adding comments about where the
helpers are coming from this in mptcp_bpf.h file, and the modifications
you did on top. That should help for the maintenance.
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
Hi Matt,
On Thu, 2024-09-05 at 16:06 +0800, Geliang Tang wrote:
> Hi Matt,
>
> On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > This patch adds a "cgroup/getsockopt" way to inspect the subflows
> > of
> > a
Here should be "an mptcp socket", not "a mptcp socket". Please update
this for me when merging it.
Thanks,
-Geliang
> > mptcp socket.
> >
> > mptcp_for_each_stubflow() and other helpers related to list_dentry
> > are
> > added into progs/mptcp_bpf.h.
> >
> > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list
> > and
> > use
> > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> > allow
> > to inspect all the fields in a tcp_sock.
> >
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
> > .../selftests/bpf/progs/mptcp_subflow.c | 69
> > +++++++++++++++++++
> > 2 files changed, 96 insertions(+)
> >
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > index 782f36ed027e..92d5deed0214 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > @@ -4,9 +4,36 @@
> >
> > #include <vmlinux.h>
> > #include <bpf/bpf_core_read.h>
> > +#include "bpf_experimental.h"
> >
> > #define MPTCP_SUBFLOWS_MAX 8
> >
> > +static inline int list_is_head(const struct list_head *list,
> > + const struct list_head *head)
> > +{
> > + return list == head;
> > +}
> > +
> > +#define list_entry(ptr, type,
> > member) \
> > + container_of(ptr, type, member)
> > +
> > +#define list_first_entry(ptr, type,
> > member) \
> > + list_entry((ptr)->next, type, member)
> > +
> > +#define list_next_entry(pos,
> > member) \
> > + list_entry((pos)->member.next, typeof(*(pos)), member)
> > +
> > +#define list_entry_is_head(pos, head,
> > member) \
> > + list_is_head(&pos->member, (head))
> > +
> > +#define list_for_each_entry(pos, head,
> > member) \
> > + for (pos = list_first_entry(head, typeof(*pos),
> > member); \
> > + cond_break, !list_entry_is_head(pos, head,
> > member); \
> > + pos = list_next_entry(pos, member))
> > +
> > +#define mptcp_for_each_subflow(__msk,
> > __subflow) \
> > + list_for_each_entry(__subflow, &((__msk)->conn_list),
> > node)
> > +
>
> The helper mptcp_subflow_tcp_sock() added by the commit
> "selftests/bpf:
> Add bpf_rr scheduler & test" needs be added into this commit:
>
> static __always_inline struct sock *
> mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> {
> return subflow->tcp_sock;
> }
>
> Otherwise, compiling errors occur.
>
> Thanks,
> -Geliang
>
> > extern void mptcp_subflow_set_scheduled(struct
> > mptcp_subflow_context
> > *subflow,
> > bool scheduled) __ksym;
> >
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > index 2e28f4a215b5..70302477e326 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > @@ -4,10 +4,12 @@
> >
> > /* vmlinux.h, bpf_helpers.h and other 'define' */
> > #include "bpf_tracing_net.h"
> > +#include "mptcp_bpf.h"
> >
> > char _license[] SEC("license") = "GPL";
> >
> > char cc[TCP_CA_NAME_MAX] = "reno";
> > +int pid;
> >
> > /* Associate a subflow counter to each token */
> > struct {
> > @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
> >
> > return 1;
> > }
> > +
> > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > + struct mptcp_subflow_context *subflow;
> > + int i = 0;
> > +
> > + mptcp_for_each_subflow(msk, subflow) {
> > + struct sock *ssk;
> > +
> > + ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > + struct
> > mptcp_subflow_context));
> > +
> > + if (ssk->sk_mark != ++i) {
> > + ctx->retval = -2;
> > + break;
> > + }
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > + struct mptcp_subflow_context *subflow;
> > +
> > + mptcp_for_each_subflow(msk, subflow) {
> > + struct inet_connection_sock *icsk;
> > + struct sock *ssk;
> > +
> > + ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > + struct
> > mptcp_subflow_context));
> > + icsk = bpf_core_cast(ssk, struct
> > inet_connection_sock);
> > +
> > + if (ssk->sk_mark == 2 &&
> > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> > TCP_CA_NAME_MAX)) {
> > + ctx->retval = -2;
> > + break;
> > + }
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +SEC("cgroup/getsockopt")
> > +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> > +{
> > + struct bpf_sock *sk = ctx->sk;
> > + struct mptcp_sock *msk;
> > +
> > + if (bpf_get_current_pid_tgid() >> 32 != pid)
> > + return 1;
> > +
> > + if (!sk || sk->protocol != IPPROTO_MPTCP ||
> > + (!(ctx->level == SOL_SOCKET && ctx->optname ==
> > SO_MARK)
> > &&
> > + !(ctx->level == SOL_TCP && ctx->optname ==
> > TCP_CONGESTION)))
> > + return 1;
> > +
> > + msk = bpf_core_cast(sk, struct mptcp_sock);
> > + if (msk->pm.subflows != 1) {
> > + ctx->retval = -1;
> > + return 1;
> > + }
> > +
> > + if (ctx->optname == SO_MARK)
> > + return _check_getsockopt_subflow_mark(msk, ctx);
> > + return _check_getsockopt_subflow_cc(msk, ctx);
> > +}
>
>
© 2016 - 2026 Red Hat, Inc.