Added the test cases for MP_FAIL, use 'tc' command to trigger the
checksum failure.
Suggested-by: Davide Caratti <dcaratti@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
tools/testing/selftests/net/mptcp/config | 5 +
.../testing/selftests/net/mptcp/mptcp_join.sh | 111 ++++++++++++++++--
2 files changed, 107 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index d36b7da5082a..26955abe49f0 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -19,3 +19,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e48ce23d2386..535baa3c01e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -16,6 +16,7 @@ mptcp_connect=""
capture=0
checksum=0
do_all_tests=1
+nr_fail=0
TEST_COUNT=0
@@ -58,6 +59,8 @@ init()
fi
done
+ validate_checksum=$checksum
+
# ns1 ns2
# ns1eth1 ns2eth1
# ns1eth2 ns2eth2
@@ -161,6 +164,45 @@ reset_with_allow_join_id0()
ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
}
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 32-bit word at byte offset 148 for all TCP ACK
+# packets carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet
+# is still considered as valid at the TCP level.
+# But because the MPTCP checksum, covering the TCP options and data,
+# has not been updated, we will detect the modification and emit an
+# MP_FAIL: what we want to validate here.
+#
+# Please note that this rule will produce this pr_info() message for
+# each TCP ACK packets not carrying enough data:
+#
+# tc action pedit offset 162 out of bounds
+#
+# But this should be limited to a very few numbers of packets as we
+# restrict this rule to outgoing TCP traffic with only the ACK flag
+# + except the 3rd ACK, only packets carrying data should be seen in
+# this direction.
+reset_with_fail()
+{
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+ validate_checksum=1
+ nr_fail=0
+ i="$1"
+
+ tc -n $ns2 qdisc add dev ns2eth$i clsact
+ tc -n $ns2 filter add dev ns2eth$i egress \
+ protocol ip prio 1000 \
+ flower ip_proto tcp tcp_flags 0x10/0xff \
+ action pedit munge offset 148 u32 invert \
+ pipe csum tcp \
+ index 100
+}
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -179,6 +221,12 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
+jq -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run all tests without jq tool"
+ exit $ksft_skip
+fi
+
print_file_err()
{
ls -l "$1" 1>&2
@@ -233,6 +281,21 @@ link_failure()
done
}
+get_nr_fail()
+{
+ i="$1"
+
+ local action=$(tc -n $ns2 -j -s action show action pedit index 100)
+ local packets=$(echo $action | jq '.[1].actions[0].stats.packets')
+ local overlimits=$(echo $action | jq '.[1].actions[0].stats.overlimits')
+
+ let pkt=$packets-$overlimits
+ if [ $pkt -gt 0 ]; then
+ nr_fail=1
+ fi
+ tc -n $ns2 qdisc del dev ns2eth$i clsact
+}
+
# $1: IP address
is_v6()
{
@@ -589,6 +652,8 @@ dump_stats()
chk_csum_nr()
{
local msg=${1:-""}
+ local csum_ns1=${2:-0}
+ local csum_ns2=${3:-0}
local count
local dump_stats
@@ -600,8 +665,8 @@ chk_csum_nr()
printf " %-36s %s" "$msg" "sum"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
[ -z "$count" ] && count=0
- if [ "$count" != 0 ]; then
- echo "[fail] got $count data checksum error[s] expected 0"
+ if [ "$count" != $csum_ns1 ]; then
+ echo "[fail] got $count data checksum error[s] expected $csum_ns1"
ret=1
dump_stats=1
else
@@ -610,8 +675,8 @@ chk_csum_nr()
echo -n " - csum "
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
[ -z "$count" ] && count=0
- if [ "$count" != 0 ]; then
- echo "[fail] got $count data checksum error[s] expected 0"
+ if [ "$count" != $csum_ns2 ]; then
+ echo "[fail] got $count data checksum error[s] expected $csum_ns2"
ret=1
dump_stats=1
else
@@ -690,6 +755,8 @@ chk_join_nr()
local syn_nr=$2
local syn_ack_nr=$3
local ack_nr=$4
+ local fail_nr=${5:-0}
+ local infi_nr=${6:-0}
local count
local dump_stats
@@ -726,10 +793,10 @@ chk_join_nr()
echo "[ ok ]"
fi
[ "${dump_stats}" = 1 ] && dump_stats
- if [ $checksum -eq 1 ]; then
- chk_csum_nr
- chk_fail_nr 0 0
- chk_infi_nr 0 0
+ if [ $validate_checksum -eq 1 ]; then
+ chk_csum_nr "" $fail_nr
+ chk_fail_nr $fail_nr $fail_nr
+ chk_infi_nr $infi_nr $infi_nr
fi
}
@@ -1985,6 +2052,27 @@ userspace_tests()
chk_rm_nr 0 0
}
+fail_tests()
+{
+ # multiple subflows
+ reset_with_fail 2
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 dev ns2eth2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 2
+ get_nr_fail 2
+ chk_join_nr "$nr_fail MP_FAIL, multiple subflows" 2 2 2 $nr_fail
+
+ # single subflow
+ reset_with_fail 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ run_tests $ns1 $ns2 10.0.1.1 2
+ get_nr_fail 1
+ chk_join_nr "$nr_fail MP_FAIL, single subflow" 0 0 0 $nr_fail $nr_fail
+}
+
all_tests()
{
subflows_tests
@@ -2003,6 +2091,7 @@ all_tests()
deny_join_id0_tests
fullmesh_tests
userspace_tests
+ fail_tests
}
usage()
@@ -2024,6 +2113,7 @@ usage()
echo " -d deny_join_id0_tests"
echo " -m fullmesh_tests"
echo " -u userspace_tests"
+ echo " -F fail_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@@ -2059,7 +2149,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fesltra64bpkdmuchCS' opt; do
+while getopts 'fesltra64bpkdmuchCSF' opt; do
case $opt in
f)
subflows_tests
@@ -2109,6 +2199,9 @@ while getopts 'fesltra64bpkdmuchCS' opt; do
u)
userspace_tests
;;
+ F)
+ fail_tests
+ ;;
c)
;;
C)
--
2.31.1
On Mon, 10 Jan 2022, Geliang Tang wrote:
> Added the test cases for MP_FAIL, use 'tc' command to trigger the
> checksum failure.
>
> Suggested-by: Davide Caratti <dcaratti@redhat.com>
> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> tools/testing/selftests/net/mptcp/config | 5 +
> .../testing/selftests/net/mptcp/mptcp_join.sh | 111 ++++++++++++++++--
> 2 files changed, 107 insertions(+), 9 deletions(-)
>
Does anyone else see the "leaked reference" errors when running these fail
tests with the latest export branch? Looks like the reference tracker
finds a tc-related error:
[ 238.871372] leaked reference.
[ 238.872393] fl_change+0x2db/0x2520
[ 238.873148] tc_new_tfilter+0x6c4/0x11f0
[ 238.873959] rtnetlink_rcv_msg+0x49f/0x640
[ 238.874798] netlink_rcv_skb+0xc4/0x1f0
[ 238.875570] netlink_unicast+0x2d5/0x410
[ 238.876364] netlink_sendmsg+0x3b3/0x6c0
[ 238.877155] sock_sendmsg+0x91/0xa0
[ 238.877890] ____sys_sendmsg+0x3ad/0x3f0
[ 238.878684] ___sys_sendmsg+0xdb/0x140
[ 238.879354] __sys_sendmsg+0xae/0x120
[ 238.879947] do_syscall_64+0x3b/0x90
[ 238.880683] entry_SYSCALL_64_after_hwframe+0x44/0xae
It doesn't look like MPTCP is to blame, but I'm curious if others see the
same.
Since this test till has the file mismatch error, I don't think it's ready
for the export branch yet. But I think fixing this will involve two
things:
1. (Likely) A fix to the code that flushes the received data from the
subflow rx buffer, so corrupt data is dropped before it gets copied to the
MPTCP-level buffer. Still trying to narrow this down, and it will likely
involve a separate patch (maybe to squash to an existing patch in the
export branch, maybe not).
2. A way to accept the expected bit flips in the "infinite fallback" case,
where the checksum failure causes fallback and the data is not rejected.
check_transfer just uses 'cmp' right now to make sure the temp files are
identical. Another tool (maybe awk, maybe a small c program in our test
directory) could compare bytes in each file allowing either identical
bytes or inverted bytes:
// assuming files already mmap'd
for (i = 0; i < length; i++) {
if ((file1[i] != file2[i]) && (file1[i] != ~file2[i]))
return 1;
}
return 0;
For the case where inverted bytes are valid, use the alternate comparison
tool.
- Mat
> diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
> index d36b7da5082a..26955abe49f0 100644
> --- a/tools/testing/selftests/net/mptcp/config
> +++ b/tools/testing/selftests/net/mptcp/config
> @@ -19,3 +19,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
> CONFIG_IP_MULTIPLE_TABLES=y
> CONFIG_IP_NF_TARGET_REJECT=m
> CONFIG_IPV6_MULTIPLE_TABLES=y
> +CONFIG_NET_ACT_CSUM=m
> +CONFIG_NET_ACT_PEDIT=m
> +CONFIG_NET_CLS_ACT=y
> +CONFIG_NET_CLS_FLOWER=m
> +CONFIG_NET_SCH_INGRESS=m
> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> index e48ce23d2386..535baa3c01e7 100755
> --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
> +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
> @@ -16,6 +16,7 @@ mptcp_connect=""
> capture=0
> checksum=0
> do_all_tests=1
> +nr_fail=0
>
> TEST_COUNT=0
>
> @@ -58,6 +59,8 @@ init()
> fi
> done
>
> + validate_checksum=$checksum
> +
> # ns1 ns2
> # ns1eth1 ns2eth1
> # ns1eth2 ns2eth2
> @@ -161,6 +164,45 @@ reset_with_allow_join_id0()
> ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
> }
>
> +# Modify TCP payload without corrupting the TCP packet
> +#
> +# This rule inverts a 32-bit word at byte offset 148 for all TCP ACK
> +# packets carrying enough data.
> +# Once it is done, the TCP Checksum field is updated so the packet
> +# is still considered as valid at the TCP level.
> +# But because the MPTCP checksum, covering the TCP options and data,
> +# has not been updated, we will detect the modification and emit an
> +# MP_FAIL: what we want to validate here.
> +#
> +# Please note that this rule will produce this pr_info() message for
> +# each TCP ACK packets not carrying enough data:
> +#
> +# tc action pedit offset 162 out of bounds
> +#
> +# But this should be limited to a very few numbers of packets as we
> +# restrict this rule to outgoing TCP traffic with only the ACK flag
> +# + except the 3rd ACK, only packets carrying data should be seen in
> +# this direction.
> +reset_with_fail()
> +{
> + reset
> +
> + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
> + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
> +
> + validate_checksum=1
> + nr_fail=0
> + i="$1"
> +
> + tc -n $ns2 qdisc add dev ns2eth$i clsact
> + tc -n $ns2 filter add dev ns2eth$i egress \
> + protocol ip prio 1000 \
> + flower ip_proto tcp tcp_flags 0x10/0xff \
> + action pedit munge offset 148 u32 invert \
> + pipe csum tcp \
> + index 100
> +}
> +
> ip -Version > /dev/null 2>&1
> if [ $? -ne 0 ];then
> echo "SKIP: Could not run test without ip tool"
> @@ -179,6 +221,12 @@ if [ $? -ne 0 ];then
> exit $ksft_skip
> fi
>
> +jq -V > /dev/null 2>&1
> +if [ $? -ne 0 ];then
> + echo "SKIP: Could not run all tests without jq tool"
> + exit $ksft_skip
> +fi
> +
> print_file_err()
> {
> ls -l "$1" 1>&2
> @@ -233,6 +281,21 @@ link_failure()
> done
> }
>
> +get_nr_fail()
> +{
> + i="$1"
> +
> + local action=$(tc -n $ns2 -j -s action show action pedit index 100)
> + local packets=$(echo $action | jq '.[1].actions[0].stats.packets')
> + local overlimits=$(echo $action | jq '.[1].actions[0].stats.overlimits')
> +
> + let pkt=$packets-$overlimits
> + if [ $pkt -gt 0 ]; then
> + nr_fail=1
> + fi
> + tc -n $ns2 qdisc del dev ns2eth$i clsact
> +}
> +
> # $1: IP address
> is_v6()
> {
> @@ -589,6 +652,8 @@ dump_stats()
> chk_csum_nr()
> {
> local msg=${1:-""}
> + local csum_ns1=${2:-0}
> + local csum_ns2=${3:-0}
> local count
> local dump_stats
>
> @@ -600,8 +665,8 @@ chk_csum_nr()
> printf " %-36s %s" "$msg" "sum"
> count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
> [ -z "$count" ] && count=0
> - if [ "$count" != 0 ]; then
> - echo "[fail] got $count data checksum error[s] expected 0"
> + if [ "$count" != $csum_ns1 ]; then
> + echo "[fail] got $count data checksum error[s] expected $csum_ns1"
> ret=1
> dump_stats=1
> else
> @@ -610,8 +675,8 @@ chk_csum_nr()
> echo -n " - csum "
> count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
> [ -z "$count" ] && count=0
> - if [ "$count" != 0 ]; then
> - echo "[fail] got $count data checksum error[s] expected 0"
> + if [ "$count" != $csum_ns2 ]; then
> + echo "[fail] got $count data checksum error[s] expected $csum_ns2"
> ret=1
> dump_stats=1
> else
> @@ -690,6 +755,8 @@ chk_join_nr()
> local syn_nr=$2
> local syn_ack_nr=$3
> local ack_nr=$4
> + local fail_nr=${5:-0}
> + local infi_nr=${6:-0}
> local count
> local dump_stats
>
> @@ -726,10 +793,10 @@ chk_join_nr()
> echo "[ ok ]"
> fi
> [ "${dump_stats}" = 1 ] && dump_stats
> - if [ $checksum -eq 1 ]; then
> - chk_csum_nr
> - chk_fail_nr 0 0
> - chk_infi_nr 0 0
> + if [ $validate_checksum -eq 1 ]; then
> + chk_csum_nr "" $fail_nr
> + chk_fail_nr $fail_nr $fail_nr
> + chk_infi_nr $infi_nr $infi_nr
> fi
> }
>
> @@ -1985,6 +2052,27 @@ userspace_tests()
> chk_rm_nr 0 0
> }
>
> +fail_tests()
> +{
> + # multiple subflows
> + reset_with_fail 2
> + ip netns exec $ns1 ./pm_nl_ctl limits 0 2
> + ip netns exec $ns2 ./pm_nl_ctl limits 0 2
> + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
> + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 dev ns2eth2 flags subflow
> + run_tests $ns1 $ns2 10.0.1.1 2
> + get_nr_fail 2
> + chk_join_nr "$nr_fail MP_FAIL, multiple subflows" 2 2 2 $nr_fail
> +
> + # single subflow
> + reset_with_fail 1
> + ip netns exec $ns1 ./pm_nl_ctl limits 0 2
> + ip netns exec $ns2 ./pm_nl_ctl limits 0 2
> + run_tests $ns1 $ns2 10.0.1.1 2
> + get_nr_fail 1
> + chk_join_nr "$nr_fail MP_FAIL, single subflow" 0 0 0 $nr_fail $nr_fail
> +}
> +
> all_tests()
> {
> subflows_tests
> @@ -2003,6 +2091,7 @@ all_tests()
> deny_join_id0_tests
> fullmesh_tests
> userspace_tests
> + fail_tests
> }
>
> usage()
> @@ -2024,6 +2113,7 @@ usage()
> echo " -d deny_join_id0_tests"
> echo " -m fullmesh_tests"
> echo " -u userspace_tests"
> + echo " -F fail_tests"
> echo " -c capture pcap files"
> echo " -C enable data checksum"
> echo " -h help"
> @@ -2059,7 +2149,7 @@ if [ $do_all_tests -eq 1 ]; then
> exit $ret
> fi
>
> -while getopts 'fesltra64bpkdmuchCS' opt; do
> +while getopts 'fesltra64bpkdmuchCSF' opt; do
> case $opt in
> f)
> subflows_tests
> @@ -2109,6 +2199,9 @@ while getopts 'fesltra64bpkdmuchCS' opt; do
> u)
> userspace_tests
> ;;
> + F)
> + fail_tests
> + ;;
> c)
> ;;
> C)
> --
> 2.31.1
>
>
>
--
Mat Martineau
Intel
Hi Mat, On 13/01/2022 02:06, Mat Martineau wrote: > On Mon, 10 Jan 2022, Geliang Tang wrote: > >> Added the test cases for MP_FAIL, use 'tc' command to trigger the >> checksum failure. >> >> Suggested-by: Davide Caratti <dcaratti@redhat.com> >> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net> >> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> >> Signed-off-by: Geliang Tang <geliang.tang@suse.com> >> --- >> tools/testing/selftests/net/mptcp/config | 5 + >> .../testing/selftests/net/mptcp/mptcp_join.sh | 111 ++++++++++++++++-- >> 2 files changed, 107 insertions(+), 9 deletions(-) >> > > Does anyone else see the "leaked reference" errors when running these > fail tests with the latest export branch? Looks like the reference > tracker finds a tc-related error: > > [ 238.871372] leaked reference. > [ 238.872393] fl_change+0x2db/0x2520 > [ 238.873148] tc_new_tfilter+0x6c4/0x11f0 > [ 238.873959] rtnetlink_rcv_msg+0x49f/0x640 > [ 238.874798] netlink_rcv_skb+0xc4/0x1f0 > [ 238.875570] netlink_unicast+0x2d5/0x410 > [ 238.876364] netlink_sendmsg+0x3b3/0x6c0 > [ 238.877155] sock_sendmsg+0x91/0xa0 > [ 238.877890] ____sys_sendmsg+0x3ad/0x3f0 > [ 238.878684] ___sys_sendmsg+0xdb/0x140 > [ 238.879354] __sys_sendmsg+0xae/0x120 > [ 238.879947] do_syscall_64+0x3b/0x90 > [ 238.880683] entry_SYSCALL_64_after_hwframe+0x44/0xae > > It doesn't look like MPTCP is to blame, but I'm curious if others see > the same. The public CI didn't report that: https://api.cirrus-ci.com/v1/task/5851145250799616/logs/test.log We can see a lot of TC warnings that still need to be fixed: tc action pedit offset 162 out of bounds I guess we need to change the test to send data mainly in one direction to avoid all these pure ACKs causing issues with the TC filter and probably causing the corresponding test to fail. For more details about the results from the CI: ===== 8< ===== Our CI did some validations and here is its report: - KVM Validation: normal: - Unstable: 1 failed test(s): selftest_mptcp_join 🔴: - Task: https://cirrus-ci.com/task/4725245343956992 - Summary: https://api.cirrus-ci.com/v1/artifact/task/4725245343956992/summary/summary.txt - KVM Validation: debug: - Unstable: 2 failed test(s): selftest_diag selftest_mptcp_join 🔴: - Task: https://cirrus-ci.com/task/5851145250799616 - Summary: https://api.cirrus-ci.com/v1/artifact/task/5851145250799616/summary/summary.txt Initiator: Patchew Applier Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/274e87003fe5 ===== 8< ===== Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
On Wed, 12 Jan 2022, Mat Martineau wrote: > On Mon, 10 Jan 2022, Geliang Tang wrote: > >> Added the test cases for MP_FAIL, use 'tc' command to trigger the >> checksum failure. >> >> Suggested-by: Davide Caratti <dcaratti@redhat.com> >> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net> >> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> >> Signed-off-by: Geliang Tang <geliang.tang@suse.com> >> --- >> tools/testing/selftests/net/mptcp/config | 5 + >> .../testing/selftests/net/mptcp/mptcp_join.sh | 111 ++++++++++++++++-- >> 2 files changed, 107 insertions(+), 9 deletions(-) >> > > Does anyone else see the "leaked reference" errors when running these fail > tests with the latest export branch? Looks like the reference tracker finds a > tc-related error: > > [ 238.871372] leaked reference. > [ 238.872393] fl_change+0x2db/0x2520 > [ 238.873148] tc_new_tfilter+0x6c4/0x11f0 > [ 238.873959] rtnetlink_rcv_msg+0x49f/0x640 > [ 238.874798] netlink_rcv_skb+0xc4/0x1f0 > [ 238.875570] netlink_unicast+0x2d5/0x410 > [ 238.876364] netlink_sendmsg+0x3b3/0x6c0 > [ 238.877155] sock_sendmsg+0x91/0xa0 > [ 238.877890] ____sys_sendmsg+0x3ad/0x3f0 > [ 238.878684] ___sys_sendmsg+0xdb/0x140 > [ 238.879354] __sys_sendmsg+0xae/0x120 > [ 238.879947] do_syscall_64+0x3b/0x90 > [ 238.880683] entry_SYSCALL_64_after_hwframe+0x44/0xae > > It doesn't look like MPTCP is to blame, but I'm curious if others see the > same. I've attached my kernel config as Davide requested in the meeting today. I had applied this version of the "Clarify when options can be used" series to export/20220111T054942, which is based on net-next/master at fe8152b38d3a (still the current HEAD). -- Mat Martineau Intel
hello, started looking at this, but the wild-guess is that the problem should be fixed with [1]: I'll do a quick test on my VM now. -- davide [1] https://lore.kernel.org/netdev/20220110094750.236478-1-eric.dumazet@gmail.com/ On Thu, Jan 13, 2022 at 6:09 PM Mat Martineau <mathew.j.martineau@linux.intel.com> wrote: > > On Wed, 12 Jan 2022, Mat Martineau wrote: > > > On Mon, 10 Jan 2022, Geliang Tang wrote: > > > >> Added the test cases for MP_FAIL, use 'tc' command to trigger the > >> checksum failure. > >> > >> Suggested-by: Davide Caratti <dcaratti@redhat.com> > >> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net> > >> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> > >> Signed-off-by: Geliang Tang <geliang.tang@suse.com> > >> --- > >> tools/testing/selftests/net/mptcp/config | 5 + > >> .../testing/selftests/net/mptcp/mptcp_join.sh | 111 ++++++++++++++++-- > >> 2 files changed, 107 insertions(+), 9 deletions(-) > >> > > > > Does anyone else see the "leaked reference" errors when running these fail > > tests with the latest export branch? Looks like the reference tracker finds a > > tc-related error: > > > > [ 238.871372] leaked reference. > > [ 238.872393] fl_change+0x2db/0x2520 > > [ 238.873148] tc_new_tfilter+0x6c4/0x11f0 > > [ 238.873959] rtnetlink_rcv_msg+0x49f/0x640 > > [ 238.874798] netlink_rcv_skb+0xc4/0x1f0 > > [ 238.875570] netlink_unicast+0x2d5/0x410 > > [ 238.876364] netlink_sendmsg+0x3b3/0x6c0 > > [ 238.877155] sock_sendmsg+0x91/0xa0 > > [ 238.877890] ____sys_sendmsg+0x3ad/0x3f0 > > [ 238.878684] ___sys_sendmsg+0xdb/0x140 > > [ 238.879354] __sys_sendmsg+0xae/0x120 > > [ 238.879947] do_syscall_64+0x3b/0x90 > > [ 238.880683] entry_SYSCALL_64_after_hwframe+0x44/0xae > > > > It doesn't look like MPTCP is to blame, but I'm curious if others see the > > same. > > I've attached my kernel config as Davide requested in the meeting today. > > I had applied this version of the "Clarify when options can be used" > series to export/20220111T054942, which is based on net-next/master at > fe8152b38d3a (still the current HEAD). > > -- > Mat Martineau > Intel
Hi Davide,
On 18/01/2022 14:32, Davide Caratti wrote:
> hello,
>
> started looking at this, but the wild-guess is that the problem should
> be fixed with [1]: I'll do a quick test on my VM now.
>
> [1] https://lore.kernel.org/netdev/20220110094750.236478-1-eric.dumazet@gmail.com/
Good point, it might be linked to that.
This commit is not in net-next yet, hence not in our tree ("export"
branch). I can easily add it there if needed.
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Tue, Jan 18, 2022 at 3:15 PM Matthieu Baerts <matthieu.baerts@tessares.net> wrote: > > Hi Davide, > > On 18/01/2022 14:32, Davide Caratti wrote: > > hello, > > > > started looking at this, but the wild-guess is that the problem should > > be fixed with [1]: I'll do a quick test on my VM now. > > > > [1] https://lore.kernel.org/netdev/20220110094750.236478-1-eric.dumazet@gmail.com/ > > Good point, it might be linked to that. hello, confirmed, the problem happens when kernels are built with CONFIG_NET_NS_REFCNT_TRACKER=y and the problem is fixed by the above-mentioned patch from Eric. It can be easily verified also with tdc kselftest: # pushd tools/testing/selftests/tc-testing; ./tdc.py -f tc-tests/filters/tests.json ; popd thanks! -- davide
On Tue, 18 Jan 2022, Davide Caratti wrote: > On Tue, Jan 18, 2022 at 3:15 PM Matthieu Baerts > <matthieu.baerts@tessares.net> wrote: >> >> Hi Davide, >> >> On 18/01/2022 14:32, Davide Caratti wrote: >>> hello, >>> >>> started looking at this, but the wild-guess is that the problem should >>> be fixed with [1]: I'll do a quick test on my VM now. >>> >>> [1] https://lore.kernel.org/netdev/20220110094750.236478-1-eric.dumazet@gmail.com/ >> >> Good point, it might be linked to that. > > hello, > > confirmed, the problem happens when kernels are built with > CONFIG_NET_NS_REFCNT_TRACKER=y and the problem is fixed by the > above-mentioned patch from Eric. It can be easily verified also with > tdc kselftest: > > # pushd tools/testing/selftests/tc-testing; ./tdc.py -f > tc-tests/filters/tests.json ; popd > Thanks for tracking this down and confirming, Davide! -- Mat Martineau Intel
Hi Davide, On 18/01/2022 17:48, Davide Caratti wrote: > On Tue, Jan 18, 2022 at 3:15 PM Matthieu Baerts > <matthieu.baerts@tessares.net> wrote: >> >> Hi Davide, >> >> On 18/01/2022 14:32, Davide Caratti wrote: >>> hello, >>> >>> started looking at this, but the wild-guess is that the problem should >>> be fixed with [1]: I'll do a quick test on my VM now. >>> >>> [1] https://lore.kernel.org/netdev/20220110094750.236478-1-eric.dumazet@gmail.com/ >> >> Good point, it might be linked to that. > > hello, > > confirmed, the problem happens when kernels are built with > CONFIG_NET_NS_REFCNT_TRACKER=y and the problem is fixed by the > above-mentioned patch from Eric. Thank you for having checked! I just added this patch in our tree: - 943a4ed011fc: net: sched: do not allocate a tracker in tcf_exts_init() - Results: a99ed2058930..fbaefb5453f9 Builds and tests are now in progress: https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20220119T102401 https://github.com/multipath-tcp/mptcp_net-next/actions/workflows/build-validation.yml?query=branch:export I guess I should better enable this kconfig option for the debug builds, no? Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
© 2016 - 2026 Red Hat, Inc.