[PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.

Paolo Abeni posted 1 patch 2 years, 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/multipath-tcp/mptcp_net-next tags/patchew/b6f8d2e06bfa3b15cc7fa0ac3076cfafd7448b07.1639482622.git.pabeni@redhat.com
Maintainers: Shuah Khan <shuah@kernel.org>, Jakub Kicinski <kuba@kernel.org>, "David S. Miller" <davem@davemloft.net>, Mat Martineau <mathew.j.martineau@linux.intel.com>, Matthieu Baerts <matthieu.baerts@tessares.net>
There is a newer version of this series
.../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
1 file changed, 49 insertions(+), 16 deletions(-)
[PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Paolo Abeni 2 years, 3 months ago
MPTCP join self-tests are a bit fragile as they reply on
delays instead of events to catch-up with the expected
sockets states.

Replace the delay with state checking where possible and
reduce the number of sleeps in the most complex scenarios.

This will both reduce the tests run-time and will improve
stability.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v1 -> v2:
 - fix ipv6 addr handling
---
 .../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 8f49bbd4a201..7014391d750e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -238,6 +238,42 @@ is_v6()
 	[ -z "${1##*:*}" ]
 }
 
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+			break
+		sleep 0.1
+	done
+}
+
+# $1: ns, $2: endpoint id
+wait_address_deleted()
+{
+	local ns="${1}"
+	local addr="${2}"
+	local cnt
+
+	# 'ss' will trim everything from the rightmost ':' on looking
+	# for a trailing port number. Append a colon to avoid ipv6
+	# address corruption
+	is_v6 ${addr} && addr="inet6:$addr:"
+
+	for i in $(seq 10); do
+		cnt=$(ss -HtON ${ns} state big src = ${addr} | wc -l)
+		[ "$cnt" = 0 ] || break;
+		sleep 0.1
+	done
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -307,7 +343,7 @@ do_transfer()
 	fi
 	spid=$!
 
-	sleep 1
+	wait_local_port_listen "${listener_ns}" "${port}"
 
 	if [ "$test_link_fail" -eq 0 ];then
 		timeout ${timeout_test} \
@@ -324,10 +360,13 @@ do_transfer()
 	fi
 	cpid=$!
 
+	# let the mptcp subflow be established in background before
+	# do endpoint manipulation
+	[ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
+
 	if [ $addr_nr_ns1 -gt 0 ]; then
 		let add_nr_ns1=addr_nr_ns1
 		counter=2
-		sleep 1
 		while [ $add_nr_ns1 -gt 0 ]; do
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -339,7 +378,6 @@ do_transfer()
 			let counter+=1
 			let add_nr_ns1-=1
 		done
-		sleep 1
 	elif [ $addr_nr_ns1 -lt 0 ]; then
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
@@ -347,22 +385,18 @@ do_transfer()
 			pos=1
 			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
 			if [ ${#dump[@]} -gt 0 ]; then
-				sleep 1
-
 				while [ $counter -le $rm_nr_ns1 ]
 				do
 					id=${dump[$pos]}
 					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
-					sleep 1
+					wait_address_deleted ${listener_ns} ${dump[-1]}
 					let counter+=1
 					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns1 -eq 8 ]; then
-			sleep 1
 			ip netns exec ${listener_ns} ./pm_nl_ctl flush
 		elif [ $rm_nr_ns1 -eq 9 ]; then
-			sleep 1
 			ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
 		fi
 	fi
@@ -373,10 +407,13 @@ do_transfer()
 		addr_nr_ns2=${addr_nr_ns2:9}
 	fi
 
+	# if newly added endpoints must be deleted, give the background msk
+	# some time to created them
+	[ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
+
 	if [ $addr_nr_ns2 -gt 0 ]; then
 		let add_nr_ns2=addr_nr_ns2
 		counter=3
-		sleep 1
 		while [ $add_nr_ns2 -gt 0 ]; do
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -388,7 +425,6 @@ do_transfer()
 			let counter+=1
 			let add_nr_ns2-=1
 		done
-		sleep 1
 	elif [ $addr_nr_ns2 -lt 0 ]; then
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
@@ -396,19 +432,17 @@ do_transfer()
 			pos=1
 			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
 			if [ ${#dump[@]} -gt 0 ]; then
-				sleep 1
-
 				while [ $counter -le $rm_nr_ns2 ]
 				do
+					# rm_addr are serialized, allow the previous one to complete
 					id=${dump[$pos]}
 					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
-					sleep 1
+					wait_address_deleted ${connector_ns} ${dump[-1]}
 					let counter+=1
 					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns2 -eq 8 ]; then
-			sleep 1
 			ip netns exec ${connector_ns} ./pm_nl_ctl flush
 		elif [ $rm_nr_ns2 -eq 9 ]; then
 			local addr
@@ -417,7 +451,6 @@ do_transfer()
 			else
 				addr="10.0.1.2"
 			fi
-			sleep 1
 			ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
 		fi
 	fi
@@ -1685,7 +1718,7 @@ add_addr_ports_tests()
 	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
 	chk_join_nr "flush subflows and signal with port" 3 3 3
 	chk_add_nr 1 1
 	chk_rm_nr 2 2
-- 
2.33.1


Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Mat Martineau 2 years, 3 months ago
On Tue, 14 Dec 2021, Paolo Abeni wrote:

> MPTCP join self-tests are a bit fragile as they reply on
> delays instead of events to catch-up with the expected
> sockets states.
>
> Replace the delay with state checking where possible and
> reduce the number of sleeps in the most complex scenarios.
>
> This will both reduce the tests run-time and will improve
> stability.

Running this in my local VM I saw more failures with the patch than 
without (this is a kernel with a lot of debug turned on):

27 remove single subflow                syn[ ok ] - synack[ ok ] - ack[ ok ]
                                         rm [fail] got 0 RM_ADDR[s] expected 1 - sf    [ ok ]

...

37 flush invalid addresses              syn[ ok ] - synack[ ok ] - ack[ ok ]
                                         add[ ok ] - echo  [ ok ]
                                         rm [ ok ] - sf    [ ok ]
copyfd_io_poll: poll timed out (events: POLLIN 1, POLLOUT 0)
  client exit code 2, server 0


Could have been an unlucky run on my machine :)

Matthieu, does it improve behavior in your test environments?


- Mat


>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v1 -> v2:
> - fix ipv6 addr handling
> ---
> .../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
> 1 file changed, 49 insertions(+), 16 deletions(-)
>
> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> index 8f49bbd4a201..7014391d750e 100755
> --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
> +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> @@ -238,6 +238,42 @@ is_v6()
> 	[ -z "${1##*:*}" ]
> }
>
> +# $1: ns, $2: port
> +wait_local_port_listen()
> +{
> +	local listener_ns="${1}"
> +	local port="${2}"
> +
> +	local port_hex i
> +
> +	port_hex="$(printf "%04X" "${port}")"
> +	for i in $(seq 10); do
> +		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
> +			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
> +			break
> +		sleep 0.1
> +	done
> +}
> +
> +# $1: ns, $2: endpoint id
> +wait_address_deleted()
> +{
> +	local ns="${1}"
> +	local addr="${2}"
> +	local cnt
> +
> +	# 'ss' will trim everything from the rightmost ':' on looking
> +	# for a trailing port number. Append a colon to avoid ipv6
> +	# address corruption
> +	is_v6 ${addr} && addr="inet6:$addr:"
> +
> +	for i in $(seq 10); do
> +		cnt=$(ss -HtON ${ns} state big src = ${addr} | wc -l)
> +		[ "$cnt" = 0 ] || break;
> +		sleep 0.1
> +	done
> +}
> +
> do_transfer()
> {
> 	listener_ns="$1"
> @@ -307,7 +343,7 @@ do_transfer()
> 	fi
> 	spid=$!
>
> -	sleep 1
> +	wait_local_port_listen "${listener_ns}" "${port}"
>
> 	if [ "$test_link_fail" -eq 0 ];then
> 		timeout ${timeout_test} \
> @@ -324,10 +360,13 @@ do_transfer()
> 	fi
> 	cpid=$!
>
> +	# let the mptcp subflow be established in background before
> +	# do endpoint manipulation
> +	[ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
> +
> 	if [ $addr_nr_ns1 -gt 0 ]; then
> 		let add_nr_ns1=addr_nr_ns1
> 		counter=2
> -		sleep 1
> 		while [ $add_nr_ns1 -gt 0 ]; do
> 			local addr
> 			if is_v6 "${connect_addr}"; then
> @@ -339,7 +378,6 @@ do_transfer()
> 			let counter+=1
> 			let add_nr_ns1-=1
> 		done
> -		sleep 1
> 	elif [ $addr_nr_ns1 -lt 0 ]; then
> 		let rm_nr_ns1=-addr_nr_ns1
> 		if [ $rm_nr_ns1 -lt 8 ]; then
> @@ -347,22 +385,18 @@ do_transfer()
> 			pos=1
> 			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
> 			if [ ${#dump[@]} -gt 0 ]; then
> -				sleep 1
> -
> 				while [ $counter -le $rm_nr_ns1 ]
> 				do
> 					id=${dump[$pos]}
> 					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
> -					sleep 1
> +					wait_address_deleted ${listener_ns} ${dump[-1]}
> 					let counter+=1
> 					let pos+=5
> 				done
> 			fi
> 		elif [ $rm_nr_ns1 -eq 8 ]; then
> -			sleep 1
> 			ip netns exec ${listener_ns} ./pm_nl_ctl flush
> 		elif [ $rm_nr_ns1 -eq 9 ]; then
> -			sleep 1
> 			ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
> 		fi
> 	fi
> @@ -373,10 +407,13 @@ do_transfer()
> 		addr_nr_ns2=${addr_nr_ns2:9}
> 	fi
>
> +	# if newly added endpoints must be deleted, give the background msk
> +	# some time to created them
> +	[ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
> +
> 	if [ $addr_nr_ns2 -gt 0 ]; then
> 		let add_nr_ns2=addr_nr_ns2
> 		counter=3
> -		sleep 1
> 		while [ $add_nr_ns2 -gt 0 ]; do
> 			local addr
> 			if is_v6 "${connect_addr}"; then
> @@ -388,7 +425,6 @@ do_transfer()
> 			let counter+=1
> 			let add_nr_ns2-=1
> 		done
> -		sleep 1
> 	elif [ $addr_nr_ns2 -lt 0 ]; then
> 		let rm_nr_ns2=-addr_nr_ns2
> 		if [ $rm_nr_ns2 -lt 8 ]; then
> @@ -396,19 +432,17 @@ do_transfer()
> 			pos=1
> 			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
> 			if [ ${#dump[@]} -gt 0 ]; then
> -				sleep 1
> -
> 				while [ $counter -le $rm_nr_ns2 ]
> 				do
> +					# rm_addr are serialized, allow the previous one to complete
> 					id=${dump[$pos]}
> 					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
> -					sleep 1
> +					wait_address_deleted ${connector_ns} ${dump[-1]}
> 					let counter+=1
> 					let pos+=5
> 				done
> 			fi
> 		elif [ $rm_nr_ns2 -eq 8 ]; then
> -			sleep 1
> 			ip netns exec ${connector_ns} ./pm_nl_ctl flush
> 		elif [ $rm_nr_ns2 -eq 9 ]; then
> 			local addr
> @@ -417,7 +451,6 @@ do_transfer()
> 			else
> 				addr="10.0.1.2"
> 			fi
> -			sleep 1
> 			ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
> 		fi
> 	fi
> @@ -1685,7 +1718,7 @@ add_addr_ports_tests()
> 	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
> 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
> 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
> -	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
> +	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
> 	chk_join_nr "flush subflows and signal with port" 3 3 3
> 	chk_add_nr 1 1
> 	chk_rm_nr 2 2
> -- 
> 2.33.1
>
>
>

--
Mat Martineau
Intel

Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Matthieu Baerts 2 years, 3 months ago
Hi Paolo, Mat,

On 15/12/2021 02:32, Mat Martineau wrote:
> On Tue, 14 Dec 2021, Paolo Abeni wrote:
> 
>> MPTCP join self-tests are a bit fragile as they reply on
>> delays instead of events to catch-up with the expected
>> sockets states.
>>
>> Replace the delay with state checking where possible and
>> reduce the number of sleeps in the most complex scenarios.
>>
>> This will both reduce the tests run-time and will improve
>> stability.

Thank you for looking at that :)

> Running this in my local VM I saw more failures with the patch than
> without (this is a kernel with a lot of debug turned on):
> 
> 27 remove single subflow                syn[ ok ] - synack[ ok ] - ack[
> ok ]
>                                         rm [fail] got 0 RM_ADDR[s]
> expected 1 - sf    [ ok ]
> 
> ...
> 
> 37 flush invalid addresses              syn[ ok ] - synack[ ok ] - ack[
> ok ]
>                                         add[ ok ] - echo  [ ok ]
>                                         rm [ ok ] - sf    [ ok ]
> copyfd_io_poll: poll timed out (events: POLLIN 1, POLLOUT 0)
>  client exit code 2, server 0
> 
> 
> Could have been an unlucky run on my machine :)
> 
> Matthieu, does it improve behavior in your test environments?

I ran mptcp_join.sh test in a loop during the night.
After 56 iterations, I got 56 failures, each time one test was failing:


# 30 remove invalid addresses     syn[ ok ] - synack[ ok ] - ack[ ok ]

#                                 add[ ok ] - echo  [ ok ]

#                                 rm [fail] got 2 RM_ADDR[s] expected 3

#  - sf    [ ok ]


I didn't have this error with the RFC version shared on IRC.


Please note that one other test failed once in 56 iterations:

# 38 add signal address   syn[ ok ] - synack[ ok ] - ack[ ok ]

#                         add[ ok ] - echo  [fail] got 0 ADD_ADDR
echo[s] expected 1


I guess we can ignore this one for now.


But it seems to be the way to go! What I would suggest is to allow
waiting longer and return an error if we didn't find anything.

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Matthieu Baerts 2 years, 3 months ago
On 15/12/2021 10:04, Matthieu Baerts wrote:
> Hi Paolo, Mat,
> 
> On 15/12/2021 02:32, Mat Martineau wrote:
>> On Tue, 14 Dec 2021, Paolo Abeni wrote:
>>
>>> MPTCP join self-tests are a bit fragile as they reply on
>>> delays instead of events to catch-up with the expected
>>> sockets states.
>>>
>>> Replace the delay with state checking where possible and
>>> reduce the number of sleeps in the most complex scenarios.
>>>
>>> This will both reduce the tests run-time and will improve
>>> stability.
> 
> Thank you for looking at that :)
> 
>> Running this in my local VM I saw more failures with the patch than
>> without (this is a kernel with a lot of debug turned on):
>>
>> 27 remove single subflow                syn[ ok ] - synack[ ok ] - ack[
>> ok ]
>>                                         rm [fail] got 0 RM_ADDR[s]
>> expected 1 - sf    [ ok ]
>>
>> ...
>>
>> 37 flush invalid addresses              syn[ ok ] - synack[ ok ] - ack[
>> ok ]
>>                                         add[ ok ] - echo  [ ok ]
>>                                         rm [ ok ] - sf    [ ok ]
>> copyfd_io_poll: poll timed out (events: POLLIN 1, POLLOUT 0)
>>  client exit code 2, server 0
>>
>>
>> Could have been an unlucky run on my machine :)
>>
>> Matthieu, does it improve behavior in your test environments?
> 
> I ran mptcp_join.sh test in a loop during the night.

I forgot to mention the public CI also had some issues:


- KVM Validation: normal:
  - Unstable: 2 failed test(s): packetdrill_add_addr selftest_mptcp_join 🔴:
  - Task: https://cirrus-ci.com/task/5809917423517696
  - Summary:
https://api.cirrus-ci.com/v1/artifact/task/5809917423517696/summary/summary.txt

- KVM Validation: debug:
  - Unstable: 3 failed test(s): selftest_diag selftest_mptcp_connect
selftest_mptcp_join 🔴:
  - Task: https://cirrus-ci.com/task/5246967470096384
  - Summary:
https://api.cirrus-ci.com/v1/artifact/task/5246967470096384/summary/summary.txt

Initiator: Patchew Applier
Commits:
https://github.com/multipath-tcp/mptcp_net-next/commits/b3b8dde4c773

But it also looks like the CI was quite busy with many other tasks when
looking at results from other tests at the same time.

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Matthieu Baerts 2 years, 3 months ago
Hi Paolo,

On 14/12/2021 12:51, Paolo Abeni wrote:
> MPTCP join self-tests are a bit fragile as they reply on
> delays instead of events to catch-up with the expected
> sockets states.
> 
> Replace the delay with state checking where possible and
> reduce the number of sleeps in the most complex scenarios.
> 
> This will both reduce the tests run-time and will improve
> stability.

Again, thank you for looking at that!

> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v1 -> v2:
>  - fix ipv6 addr handling
> ---
>  .../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
>  1 file changed, 49 insertions(+), 16 deletions(-)
> 
> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> index 8f49bbd4a201..7014391d750e 100755
> --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
> +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> @@ -238,6 +238,42 @@ is_v6()
>  	[ -z "${1##*:*}" ]
>  }
>  
> +# $1: ns, $2: port
> +wait_local_port_listen()
> +{
> +	local listener_ns="${1}"
> +	local port="${2}"
> +
> +	local port_hex i
> +
> +	port_hex="$(printf "%04X" "${port}")"
> +	for i in $(seq 10); do

I think it is safe to increase the number of iterations. Most of the
time, we will never reach the maximum.

> +		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
> +			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
> +			break
> +		sleep 0.1
> +	done

Probably here, we should exit with an error, no?

> +}
> +
> +# $1: ns, $2: endpoint id
> +wait_address_deleted()
> +{
> +	local ns="${1}"
> +	local addr="${2}"
> +	local cnt

May you declare "i" as local too please?

> +
> +	# 'ss' will trim everything from the rightmost ':' on looking
> +	# for a trailing port number. Append a colon to avoid ipv6
> +	# address corruption
> +	is_v6 ${addr} && addr="inet6:$addr:"
> +
> +	for i in $(seq 10); do

Same here, maybe OK to increase it to 20 or much more and exit with an
error if the address has not been deleted on time.

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Paolo Abeni 2 years, 3 months ago
On Wed, 2021-12-15 at 10:11 +0100, Matthieu Baerts wrote:
> Hi Paolo,
> 
> On 14/12/2021 12:51, Paolo Abeni wrote:
> > MPTCP join self-tests are a bit fragile as they reply on
> > delays instead of events to catch-up with the expected
> > sockets states.
> > 
> > Replace the delay with state checking where possible and
> > reduce the number of sleeps in the most complex scenarios.
> > 
> > This will both reduce the tests run-time and will improve
> > stability.
> 
> Again, thank you for looking at that!
> 
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > v1 -> v2:
> >  - fix ipv6 addr handling
> > ---
> >  .../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
> >  1 file changed, 49 insertions(+), 16 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> > index 8f49bbd4a201..7014391d750e 100755
> > --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
> > +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> > @@ -238,6 +238,42 @@ is_v6()
> >  	[ -z "${1##*:*}" ]
> >  }
> >  
> > +# $1: ns, $2: port
> > +wait_local_port_listen()
> > +{
> > +	local listener_ns="${1}"
> > +	local port="${2}"
> > +
> > +	local port_hex i
> > +
> > +	port_hex="$(printf "%04X" "${port}")"
> > +	for i in $(seq 10); do
> 
> I think it is safe to increase the number of iterations. Most of the
> time, we will never reach the maximum.
> 
> > +		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
> > +			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
> > +			break
> > +		sleep 0.1
> > +	done
> 
> Probably here, we should exit with an error, no?

No, it's better to let the caller fail, so we get better/more
consistent error reporting.

This code is copied verbatim from mptcp_connect.sh, I think we don't
want to introduce differences.

> 
> > +}
> > +
> > +# $1: ns, $2: endpoint id
> > +wait_address_deleted()
> > +{
> > +	local ns="${1}"
> > +	local addr="${2}"
> > +	local cnt
> 
> May you declare "i" as local too please?

wait_address_deleted() is bugged and the source of the reported
failures. I'm rewriting it.

Thanks!

Paolo


Re: [PATCH v2 mptcp-next] selftests: mptcp: more stable join tests-cases.
Posted by Matthieu Baerts 2 years, 3 months ago
On 15/12/2021 11:23, Paolo Abeni wrote:
> On Wed, 2021-12-15 at 10:11 +0100, Matthieu Baerts wrote:
>> Hi Paolo,
>>
>> On 14/12/2021 12:51, Paolo Abeni wrote:
>>> MPTCP join self-tests are a bit fragile as they reply on
>>> delays instead of events to catch-up with the expected
>>> sockets states.
>>>
>>> Replace the delay with state checking where possible and
>>> reduce the number of sleeps in the most complex scenarios.
>>>
>>> This will both reduce the tests run-time and will improve
>>> stability.
>>
>> Again, thank you for looking at that!
>>
>>>
>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>> ---
>>> v1 -> v2:
>>>  - fix ipv6 addr handling
>>> ---
>>>  .../testing/selftests/net/mptcp/mptcp_join.sh | 65 ++++++++++++++-----
>>>  1 file changed, 49 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
>>> index 8f49bbd4a201..7014391d750e 100755
>>> --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
>>> +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
>>> @@ -238,6 +238,42 @@ is_v6()
>>>  	[ -z "${1##*:*}" ]
>>>  }
>>>  
>>> +# $1: ns, $2: port
>>> +wait_local_port_listen()
>>> +{
>>> +	local listener_ns="${1}"
>>> +	local port="${2}"
>>> +
>>> +	local port_hex i
>>> +
>>> +	port_hex="$(printf "%04X" "${port}")"
>>> +	for i in $(seq 10); do
>>
>> I think it is safe to increase the number of iterations. Most of the
>> time, we will never reach the maximum.
>>
>>> +		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
>>> +			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
>>> +			break
>>> +		sleep 0.1
>>> +	done
>>
>> Probably here, we should exit with an error, no?
> 
> No, it's better to let the caller fail, so we get better/more
> consistent error reporting.

Would it not be a useful hint to see an error like:

  Error: expected to find port ${port} in listen mode

> This code is copied verbatim from mptcp_connect.sh, I think we don't
> want to introduce differences.

Indeed but we can also modify the original one if needed :)

>>> +}
>>> +
>>> +# $1: ns, $2: endpoint id
>>> +wait_address_deleted()
>>> +{
>>> +	local ns="${1}"
>>> +	local addr="${2}"
>>> +	local cnt
>>
>> May you declare "i" as local too please?
> 
> wait_address_deleted() is bugged and the source of the reported
> failures. I'm rewriting it.

Thank you for looking at that!

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net