[PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Geliang Tang posted 2 patches 3 months ago
There is a newer version of this series
[PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 3 months ago
From: Geliang Tang <tanggeliang@kylinos.cn>

This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.

mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.

Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 .../testing/selftests/bpf/prog_tests/mptcp.c  | 23 ++++++++
 tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 ++++++++++++
 .../selftests/bpf/progs/mptcp_subflow.c       | 55 +++++++++++++++++++
 3 files changed, 113 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index d4c5209fbfaf..143ed1c756e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -371,6 +371,10 @@ static int endpoint_init(char *flags)
 static void run_subflow(void)
 {
 	int server_fd, client_fd;
+	char cc[TCP_CA_NAME_MAX];
+	unsigned int mark;
+	socklen_t len;
+	int err;
 
 	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
 	if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
@@ -382,6 +386,18 @@ static void run_subflow(void)
 
 	send_byte(client_fd);
 
+	sleep(0.1);
+
+	len = sizeof(mark);
+	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
+	if (!ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))
+		goto close_client;
+
+	len = sizeof(cc);
+	err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, cc, &len);
+	ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)");
+
+close_client:
 	close(client_fd);
 close_server:
 	close(server_fd);
@@ -392,6 +408,7 @@ static void test_subflow(void)
 	int cgroup_fd, prog_fd, err;
 	struct mptcp_subflow *skel;
 	struct nstoken *nstoken;
+	struct bpf_link *link;
 
 	cgroup_fd = test__join_cgroup("/mptcp_subflow");
 	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow"))
@@ -417,6 +434,11 @@ static void test_subflow(void)
 	if (endpoint_init("subflow") < 0)
 		goto close_netns;
 
+	link = bpf_program__attach_cgroup(skel->progs._getsockopt_subflow,
+					  cgroup_fd);
+	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
+		goto close_netns;
+
 	run_subflow();
 
 close_netns:
@@ -425,6 +447,7 @@ static void test_subflow(void)
 	mptcp_subflow__destroy(skel);
 close_cgroup:
 	close(cgroup_fd);
+	bpf_link__destroy(link);
 }
 
 static struct nstoken *sched_init(char *flags, char *sched)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..70d9f5888976 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,44 @@
 
 #include <vmlinux.h>
 #include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
 
 #define MPTCP_SUBFLOWS_MAX 8
 
+static inline int list_is_head(const struct list_head *list,
+			       const struct list_head *head)
+{
+	return list == head;
+}
+
+#define list_entry(ptr, type, member)					\
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member)				\
+	list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member)					\
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member)				\
+	list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
+	     !list_entry_is_head(pos, head, member);			\
+	     cond_break, pos = list_next_entry(pos, member))
+
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_first_entry(head, typeof(*pos), member),	\
+		n = list_next_entry(pos, member);			\
+	     !list_entry_is_head(pos, head, member);			\
+	     cond_break, pos = n, n = list_next_entry(n, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow)			\
+	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp)		\
+	list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+
 extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
 					bool scheduled) __ksym;
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index bc572e1d6df8..e8cc157278d2 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
 
 /* vmlinux.h, bpf_helpers.h and other 'define' */
 #include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -57,3 +58,57 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
 
 	return 1;
 }
+
+static int _check_getsockopt_subflows_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+
+		if (ssk->sk_mark != ++i)
+			ctx->retval = -1;
+	}
+
+	return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow, *tmp;
+
+	mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+		struct inet_connection_sock *icsk;
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+		if (ssk->sk_mark == 1 &&
+		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
+			ctx->retval = -1;
+	}
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock);
+
+	if (!msk || !msk->token)
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
+		return _check_getsockopt_subflows_mark(msk, ctx);
+	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
+		return _check_getsockopt_subflow_cc(msk, ctx);
+
+	return 1;
+}
-- 
2.43.0
Re: [PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Matthieu Baerts 3 months ago
Hi Geliang,

Thank you for the update!

On 22/08/2024 11:22, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
> mptcp socket.
> 
> mptcp_for_each_stubflow() and other helpers related to list_dentry are
> added into progs/mptcp_bpf.h.
> 
> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
> to inspect all the fields in a tcp_sock.

Should this not be squashed in "selftests/bpf: Add mptcp subflow
subtest" as well?
> 
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  .../testing/selftests/bpf/prog_tests/mptcp.c  | 23 ++++++++
>  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 ++++++++++++
>  .../selftests/bpf/progs/mptcp_subflow.c       | 55 +++++++++++++++++++
>  3 files changed, 113 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> index d4c5209fbfaf..143ed1c756e1 100644
> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> @@ -371,6 +371,10 @@ static int endpoint_init(char *flags)
>  static void run_subflow(void)
>  {
>  	int server_fd, client_fd;
> +	char cc[TCP_CA_NAME_MAX];
> +	unsigned int mark;
> +	socklen_t len;
> +	int err;
>  
>  	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
>  	if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
> @@ -382,6 +386,18 @@ static void run_subflow(void)
>  
>  	send_byte(client_fd);
>  
> +	sleep(0.1);

Why do you need a sleep here? To make sure the extra subflow has been
created? It sounds safer to wait for an event here, no? Maybe simply with:

  #ifndef SOL_MPTCP
  #define SOL_MPTCP 284
  #endif

  #ifndef MPTCP_INFO
  #define MPTCP_INFO 1
  #endif

  (...)

  u8 subflows;
  int i;

  (...)

  len = sizeof(subflows);

  /* Wait max 1 sec for new subflows to be created */
  for (i = 0; i < 10; i++) {
      err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len);

      if (!err && subflows > 0)
          break;

      sleep(0.1);
  }

WDYT? It might make sense to add this in a new helper:

  wait_for_new_subflows(client_fd);


Also, just to be sure, when does the BPF program set the socket options
per subflow? When the subflows are created? Just to be sure it is always
done before this call to 'getsockopt()', and only once.

> +
> +	len = sizeof(mark);
> +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
> +	if (!ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))

Should we also check the value of 'mark' here?

> +		goto close_client;

Maybe it is not needed to stop earlier here, we can also check the value
of the TCP CC and have info about the two checks? But maybe we need to
return an error if this getsockopt() fails, but not the next one.

  err = getsockopt(TCP_CONGESTION) || err;

> +	len = sizeof(cc);
> +	err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, cc, &len);
> +	ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)");

Here as well, we could check that 'cc' is the same as 'skel->data->cc',
because MPTCP will return the value from the first subflow.

Or, we change the BPF test to set a different TCP CC only on the second
subflow, then we compare this getsockopt() with the default value from
there:

  /proc/sys/net/ipv4/tcp_congestion_control

The validation in BPF will then check the value of the second subflow
and it should be 'reno'.


> +
> +close_client:
>  	close(client_fd);
>  close_server:
>  	close(server_fd);
> @@ -392,6 +408,7 @@ static void test_subflow(void)
>  	int cgroup_fd, prog_fd, err;
>  	struct mptcp_subflow *skel;
>  	struct nstoken *nstoken;
> +	struct bpf_link *link;
>  
>  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
>  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow"))
> @@ -417,6 +434,11 @@ static void test_subflow(void)
>  	if (endpoint_init("subflow") < 0)
>  		goto close_netns;
>  
> +	link = bpf_program__attach_cgroup(skel->progs._getsockopt_subflow,
> +					  cgroup_fd);
> +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
> +		goto close_netns;
> +
>  	run_subflow();
>  
>  close_netns:
> @@ -425,6 +447,7 @@ static void test_subflow(void)
>  	mptcp_subflow__destroy(skel);
>  close_cgroup:
>  	close(cgroup_fd);
> +	bpf_link__destroy(link);
>  }
>  
>  static struct nstoken *sched_init(char *flags, char *sched)
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> index 782f36ed027e..70d9f5888976 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> @@ -4,9 +4,44 @@
>  
>  #include <vmlinux.h>
>  #include <bpf/bpf_core_read.h>
> +#include "bpf_experimental.h"
>  
>  #define MPTCP_SUBFLOWS_MAX 8
>  
> +static inline int list_is_head(const struct list_head *list,
> +			       const struct list_head *head)
> +{
> +	return list == head;
> +}
> +
> +#define list_entry(ptr, type, member)					\
> +	container_of(ptr, type, member)
> +
> +#define list_first_entry(ptr, type, member)				\
> +	list_entry((ptr)->next, type, member)
> +
> +#define list_next_entry(pos, member)					\
> +	list_entry((pos)->member.next, typeof(*(pos)), member)
> +
> +#define list_entry_is_head(pos, head, member)				\
> +	list_is_head(&pos->member, (head))
> +
> +#define list_for_each_entry(pos, head, member)				\
> +	for (pos = list_first_entry(head, typeof(*pos), member);	\
> +	     !list_entry_is_head(pos, head, member);			\

I guess you cannot add 'cond_break' in the condition, because that's the
equivalent of 'break', right?

Also, just to be sure: can this 'cond_break' be used on all
architectures? If not, maybe best to have a "list_for_each_entry"
version that is specific to MPTCP subflows, with a "i < MPTCP_SUBFLOWS_MAX".

> +	     cond_break, pos = list_next_entry(pos, member))
> +
> +#define list_for_each_entry_safe(pos, n, head, member)			\
> +	for (pos = list_first_entry(head, typeof(*pos), member),	\
> +		n = list_next_entry(pos, member);			\
> +	     !list_entry_is_head(pos, head, member);			\

Same here for 'cond_break'.

> +	     cond_break, pos = n, n = list_next_entry(n, member))
> +
> +#define mptcp_for_each_subflow(__msk, __subflow)			\
> +	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
> +#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp)		\
> +	list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
> +
>  extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
>  					bool scheduled) __ksym;
>  
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> index bc572e1d6df8..e8cc157278d2 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> @@ -4,6 +4,7 @@
>  
>  /* vmlinux.h, bpf_helpers.h and other 'define' */
>  #include "bpf_tracing_net.h"
> +#include "mptcp_bpf.h"
>  
>  char _license[] SEC("license") = "GPL";
>  
> @@ -57,3 +58,57 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>  
>  	return 1;
>  }
> +
> +static int _check_getsockopt_subflows_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct mptcp_subflow_context));
> +
> +		if (ssk->sk_mark != ++i)
> +			ctx->retval = -1;
> +	}

I think you should check here that 'i == 2', otherwise we are not
checking all the expected subflows.

> +
> +	return 1;
> +}
> +
> +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow, *tmp;
> +
> +	mptcp_for_each_subflow_safe(msk, subflow, tmp) {
> +		struct inet_connection_sock *icsk;
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct mptcp_subflow_context));
> +		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
> +
> +		if (ssk->sk_mark == 1 &&
> +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
> +			ctx->retval = -1;
> +	}

Same here, you should check the number of subflows that have been looked
at (2), otherwise the check might succeed, while not doing any or half
of the validation.

I think here it is better if the CC is modified on the second subflow,
so we iterate at least 2 times, and expect to find the modified CC. WDYT?

> +
> +	return 1;
> +}
> +
> +SEC("cgroup/getsockopt")
> +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock);
> +
> +	if (!msk || !msk->token)

Instead of checking the token, should you not check that

  sk->sk_protocol == IPPROTO_MPTCP

no?

Just checking if there is something written at "*msk + token offset"
looks a bit random to me, and not safe.

> +		return 1;
> +
> +	if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> +		return _check_getsockopt_subflows_mark(msk, ctx);
> +	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
> +		return _check_getsockopt_subflow_cc(msk, ctx);
> +
> +	return 1;
> +}

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.
Re: [PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 3 months ago
On Thu, 2024-08-22 at 12:56 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> Thank you for the update!
> 
> On 22/08/2024 11:22, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > This patch adds a "cgroup/getsockopt" way to inspect the subflows
> > of a
> > mptcp socket.
> > 
> > mptcp_for_each_stubflow() and other helpers related to list_dentry
> > are
> > added into progs/mptcp_bpf.h.
> > 
> > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list
> > and use
> > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> > allow
> > to inspect all the fields in a tcp_sock.
> 
> Should this not be squashed in "selftests/bpf: Add mptcp subflow
> subtest" as well?
> > 
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  .../testing/selftests/bpf/prog_tests/mptcp.c  | 23 ++++++++
> >  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 ++++++++++++
> >  .../selftests/bpf/progs/mptcp_subflow.c       | 55
> > +++++++++++++++++++
> >  3 files changed, 113 insertions(+)
> > 
> > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > index d4c5209fbfaf..143ed1c756e1 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > @@ -371,6 +371,10 @@ static int endpoint_init(char *flags)
> >  static void run_subflow(void)
> >  {
> >  	int server_fd, client_fd;
> > +	char cc[TCP_CA_NAME_MAX];
> > +	unsigned int mark;
> > +	socklen_t len;
> > +	int err;
> >  
> >  	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1,
> > 0);
> >  	if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
> > @@ -382,6 +386,18 @@ static void run_subflow(void)
> >  
> >  	send_byte(client_fd);
> >  
> > +	sleep(0.1);
> 
> Why do you need a sleep here? To make sure the extra subflow has been
> created? It sounds safer to wait for an event here, no? Maybe simply
> with:
> 
>   #ifndef SOL_MPTCP
>   #define SOL_MPTCP 284
>   #endif
> 
>   #ifndef MPTCP_INFO
>   #define MPTCP_INFO 1
>   #endif
> 
>   (...)
> 
>   u8 subflows;
>   int i;
> 
>   (...)
> 
>   len = sizeof(subflows);
> 
>   /* Wait max 1 sec for new subflows to be created */
>   for (i = 0; i < 10; i++) {
>       err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len);
> 
>       if (!err && subflows > 0)
>           break;
> 
>       sleep(0.1);
>   }
> 
> WDYT? It might make sense to add this in a new helper:
> 
>   wait_for_new_subflows(client_fd);
> 
> 
> Also, just to be sure, when does the BPF program set the socket
> options
> per subflow? When the subflows are created? Just to be sure it is
> always
> done before this call to 'getsockopt()', and only once.
> 
> > +
> > +	len = sizeof(mark);
> > +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark,
> > &len);
> > +	if (!ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))
> 
> Should we also check the value of 'mark' here?
> 
> > +		goto close_client;
> 
> Maybe it is not needed to stop earlier here, we can also check the
> value
> of the TCP CC and have info about the two checks? But maybe we need
> to
> return an error if this getsockopt() fails, but not the next one.
> 
>   err = getsockopt(TCP_CONGESTION) || err;
> 
> > +	len = sizeof(cc);
> > +	err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, cc,
> > &len);
> > +	ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)");
> 
> Here as well, we could check that 'cc' is the same as 'skel->data-
> >cc',
> because MPTCP will return the value from the first subflow.
> 
> Or, we change the BPF test to set a different TCP CC only on the
> second
> subflow, then we compare this getsockopt() with the default value
> from
> there:
> 
>   /proc/sys/net/ipv4/tcp_congestion_control
> 
> The validation in BPF will then check the value of the second subflow
> and it should be 'reno'.
> 
> 
> > +
> > +close_client:
> >  	close(client_fd);
> >  close_server:
> >  	close(server_fd);
> > @@ -392,6 +408,7 @@ static void test_subflow(void)
> >  	int cgroup_fd, prog_fd, err;
> >  	struct mptcp_subflow *skel;
> >  	struct nstoken *nstoken;
> > +	struct bpf_link *link;
> >  
> >  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
> >  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup:
> > mptcp_subflow"))
> > @@ -417,6 +434,11 @@ static void test_subflow(void)
> >  	if (endpoint_init("subflow") < 0)
> >  		goto close_netns;
> >  
> > +	link = bpf_program__attach_cgroup(skel-
> > >progs._getsockopt_subflow,
> > +					  cgroup_fd);
> > +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
> > +		goto close_netns;
> > +
> >  	run_subflow();
> >  
> >  close_netns:
> > @@ -425,6 +447,7 @@ static void test_subflow(void)
> >  	mptcp_subflow__destroy(skel);
> >  close_cgroup:
> >  	close(cgroup_fd);
> > +	bpf_link__destroy(link);
> >  }
> >  
> >  static struct nstoken *sched_init(char *flags, char *sched)
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > index 782f36ed027e..70d9f5888976 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > @@ -4,9 +4,44 @@
> >  
> >  #include <vmlinux.h>
> >  #include <bpf/bpf_core_read.h>
> > +#include "bpf_experimental.h"
> >  
> >  #define MPTCP_SUBFLOWS_MAX 8
> >  
> > +static inline int list_is_head(const struct list_head *list,
> > +			       const struct list_head *head)
> > +{
> > +	return list == head;
> > +}
> > +
> > +#define list_entry(ptr, type,
> > member)					\
> > +	container_of(ptr, type, member)
> > +
> > +#define list_first_entry(ptr, type,
> > member)				\
> > +	list_entry((ptr)->next, type, member)
> > +
> > +#define list_next_entry(pos,
> > member)					\
> > +	list_entry((pos)->member.next, typeof(*(pos)), member)
> > +
> > +#define list_entry_is_head(pos, head,
> > member)				\
> > +	list_is_head(&pos->member, (head))
> > +
> > +#define list_for_each_entry(pos, head,
> > member)				\
> > +	for (pos = list_first_entry(head, typeof(*pos),
> > member);	\
> > +	     !list_entry_is_head(pos, head,
> > member);			\
> 
> I guess you cannot add 'cond_break' in the condition, because that's
> the
> equivalent of 'break', right?

Hi Matt,

This confuses me.

I used this in v2:

	mptcp_for_each_subflow(msk, subflow) {
		... ...

		cond_break;
	}

And you suggested to add "cond_break" into list_for_each_entry().

Do you mean that I should go back to v2?

Regards,
-Geliang

> 
> Also, just to be sure: can this 'cond_break' be used on all
> architectures? If not, maybe best to have a "list_for_each_entry"
> version that is specific to MPTCP subflows, with a "i <
> MPTCP_SUBFLOWS_MAX".
> 
> > +	     cond_break, pos = list_next_entry(pos, member))
> > +
> > +#define list_for_each_entry_safe(pos, n, head,
> > member)			\
> > +	for (pos = list_first_entry(head, typeof(*pos),
> > member),	\
> > +		n = list_next_entry(pos,
> > member);			\
> > +	     !list_entry_is_head(pos, head,
> > member);			\
> 
> Same here for 'cond_break'.
> 
> > +	     cond_break, pos = n, n = list_next_entry(n, member))
> > +
> > +#define mptcp_for_each_subflow(__msk,
> > __subflow)			\
> > +	list_for_each_entry(__subflow, &((__msk)->conn_list),
> > node)
> > +#define mptcp_for_each_subflow_safe(__msk, __subflow,
> > __tmp)		\
> > +	list_for_each_entry_safe(__subflow, __tmp, &((__msk)-
> > >conn_list), node)
> > +
> >  extern void mptcp_subflow_set_scheduled(struct
> > mptcp_subflow_context *subflow,
> >  					bool scheduled) __ksym;
> >  
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > index bc572e1d6df8..e8cc157278d2 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > @@ -4,6 +4,7 @@
> >  
> >  /* vmlinux.h, bpf_helpers.h and other 'define' */
> >  #include "bpf_tracing_net.h"
> > +#include "mptcp_bpf.h"
> >  
> >  char _license[] SEC("license") = "GPL";
> >  
> > @@ -57,3 +58,57 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
> >  
> >  	return 1;
> >  }
> > +
> > +static int _check_getsockopt_subflows_mark(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +	int i = 0;
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> > +		struct sock *ssk;
> > +
> > +		ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +							   struct
> > mptcp_subflow_context));
> > +
> > +		if (ssk->sk_mark != ++i)
> > +			ctx->retval = -1;
> > +	}
> 
> I think you should check here that 'i == 2', otherwise we are not
> checking all the expected subflows.
> 
> > +
> > +	return 1;
> > +}
> > +
> > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > +	struct mptcp_subflow_context *subflow, *tmp;
> > +
> > +	mptcp_for_each_subflow_safe(msk, subflow, tmp) {
> > +		struct inet_connection_sock *icsk;
> > +		struct sock *ssk;
> > +
> > +		ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +							   struct
> > mptcp_subflow_context));
> > +		icsk = bpf_core_cast(ssk, struct
> > inet_connection_sock);
> > +
> > +		if (ssk->sk_mark == 1 &&
> > +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> > TCP_CA_NAME_MAX))
> > +			ctx->retval = -1;
> > +	}
> 
> Same here, you should check the number of subflows that have been
> looked
> at (2), otherwise the check might succeed, while not doing any or
> half
> of the validation.
> 
> I think here it is better if the CC is modified on the second
> subflow,
> so we iterate at least 2 times, and expect to find the modified CC.
> WDYT?
> 
> > +
> > +	return 1;
> > +}
> > +
> > +SEC("cgroup/getsockopt")
> > +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> > +{
> > +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct
> > mptcp_sock);
> > +
> > +	if (!msk || !msk->token)
> 
> Instead of checking the token, should you not check that
> 
>   sk->sk_protocol == IPPROTO_MPTCP
> 
> no?
> 
> Just checking if there is something written at "*msk + token offset"
> looks a bit random to me, and not safe.
> 
> > +		return 1;
> > +
> > +	if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> > +		return _check_getsockopt_subflows_mark(msk, ctx);
> > +	if (ctx->level == SOL_TCP && ctx->optname ==
> > TCP_CONGESTION)
> > +		return _check_getsockopt_subflow_cc(msk, ctx);
> > +
> > +	return 1;
> > +}
> 
> Cheers,
> Matt

Re: [PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Matthieu Baerts 3 months ago
On 22/08/2024 15:31, Geliang Tang wrote:
> On Thu, 2024-08-22 at 12:56 +0200, Matthieu Baerts wrote:
>> On 22/08/2024 11:22, Geliang Tang wrote:
>>> From: Geliang Tang <tanggeliang@kylinos.cn>

(...)

>>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> index 782f36ed027e..70d9f5888976 100644
>>> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> @@ -4,9 +4,44 @@
>>>  
>>>  #include <vmlinux.h>
>>>  #include <bpf/bpf_core_read.h>
>>> +#include "bpf_experimental.h"
>>>  
>>>  #define MPTCP_SUBFLOWS_MAX 8
>>>  
>>> +static inline int list_is_head(const struct list_head *list,
>>> +			       const struct list_head *head)
>>> +{
>>> +	return list == head;
>>> +}
>>> +
>>> +#define list_entry(ptr, type,
>>> member)					\
>>> +	container_of(ptr, type, member)
>>> +
>>> +#define list_first_entry(ptr, type,
>>> member)				\
>>> +	list_entry((ptr)->next, type, member)
>>> +
>>> +#define list_next_entry(pos,
>>> member)					\
>>> +	list_entry((pos)->member.next, typeof(*(pos)), member)
>>> +
>>> +#define list_entry_is_head(pos, head,
>>> member)				\
>>> +	list_is_head(&pos->member, (head))
>>> +
>>> +#define list_for_each_entry(pos, head,
>>> member)				\
>>> +	for (pos = list_first_entry(head, typeof(*pos),
>>> member);	\
>>> +	     !list_entry_is_head(pos, head,
>>> member);			\
>>
>> I guess you cannot add 'cond_break' in the condition, because that's
>> the
>> equivalent of 'break', right?
> 
> Hi Matt,
> 
> This confuses me.
> 
> I used this in v2:
> 
> 	mptcp_for_each_subflow(msk, subflow) {
> 		... ...
> 
> 		cond_break;
> 	}
> 
> And you suggested to add "cond_break" into list_for_each_entry().
> 
> Do you mean that I should go back to v2?

No, sorry, I was just initially surprised to see 'cond_break' in the
last part of the 'for', not in second part: the condition to stop the
loop. But I then saw that 'cond_break' is a cryptic define, doing the
equivalent of a "break", not just a simple condition, it has be where
you added it.

I'm personally a bit annoy to see a copy of all these "define" for the
list here in mptcp_bpf.h, but maybe it is normal, and accepted by BPF
maintainers. Thanks to the 'cond_break' you added there, they look quite
generic, and not specific to MPTCP. They could go elsewhere, but maybe
nobody else needs to iterate a list in the BPF selftests. And they can
always be moved later on I suppose. So no need to change that in the v4.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next v3 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 3 months ago
Hi Matt,

On Thu, 2024-08-22 at 16:48 +0200, Matthieu Baerts wrote:
> On 22/08/2024 15:31, Geliang Tang wrote:
> > On Thu, 2024-08-22 at 12:56 +0200, Matthieu Baerts wrote:
> > > On 22/08/2024 11:22, Geliang Tang wrote:
> > > > From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> (...)
> 
> > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > index 782f36ed027e..70d9f5888976 100644
> > > > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > @@ -4,9 +4,44 @@
> > > >  
> > > >  #include <vmlinux.h>
> > > >  #include <bpf/bpf_core_read.h>
> > > > +#include "bpf_experimental.h"
> > > >  
> > > >  #define MPTCP_SUBFLOWS_MAX 8
> > > >  
> > > > +static inline int list_is_head(const struct list_head *list,
> > > > +			       const struct list_head *head)
> > > > +{
> > > > +	return list == head;
> > > > +}
> > > > +
> > > > +#define list_entry(ptr, type,
> > > > member)					\
> > > > +	container_of(ptr, type, member)
> > > > +
> > > > +#define list_first_entry(ptr, type,
> > > > member)				\
> > > > +	list_entry((ptr)->next, type, member)
> > > > +
> > > > +#define list_next_entry(pos,
> > > > member)					\
> > > > +	list_entry((pos)->member.next, typeof(*(pos)), member)
> > > > +
> > > > +#define list_entry_is_head(pos, head,
> > > > member)				\
> > > > +	list_is_head(&pos->member, (head))
> > > > +
> > > > +#define list_for_each_entry(pos, head,
> > > > member)				\
> > > > +	for (pos = list_first_entry(head, typeof(*pos),
> > > > member);	\
> > > > +	     !list_entry_is_head(pos, head,
> > > > member);			\
> > > 
> > > I guess you cannot add 'cond_break' in the condition, because
> > > that's
> > > the
> > > equivalent of 'break', right?
> > 
> > Hi Matt,
> > 
> > This confuses me.
> > 
> > I used this in v2:
> > 
> > 	mptcp_for_each_subflow(msk, subflow) {
> > 		... ...
> > 
> > 		cond_break;
> > 	}
> > 
> > And you suggested to add "cond_break" into list_for_each_entry().
> > 
> > Do you mean that I should go back to v2?
> 
> No, sorry, I was just initially surprised to see 'cond_break' in the
> last part of the 'for', not in second part: the condition to stop the

Yes, you're right. It's better to add 'cond_break' in second part of
'for', not the last one. Updated this in v4. And v4 addresses your
other comments in v3 too.

Thanks,
-Geliang

> loop. But I then saw that 'cond_break' is a cryptic define, doing the
> equivalent of a "break", not just a simple condition, it has be where
> you added it.
> 
> I'm personally a bit annoy to see a copy of all these "define" for
> the
> list here in mptcp_bpf.h, but maybe it is normal, and accepted by BPF
> maintainers. Thanks to the 'cond_break' you added there, they look
> quite
> generic, and not specific to MPTCP. They could go elsewhere, but
> maybe
> nobody else needs to iterate a list in the BPF selftests. And they
> can
> always be moved later on I suppose. So no need to change that in the
> v4.
> 
> Cheers,
> Matt