[PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting

Li Wang posted 1 patch 10 months ago
.../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
1 file changed, 41 insertions(+), 55 deletions(-)
[PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
Posted by Li Wang 10 months ago
In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
This can cause test flakiness when immediately asserting usage after
deleting a child cgroup. To address this, add a helper function
`assert_with_retry()` that checks usage values with a timeout-based retry.
This improves test stability without relying on fixed sleep delays.

Also bump up the tolerance size to 7MB.

To avoid False Positives:
  ...
  # Assert memory charged correctly for child only use.
  # actual a = 11 MB
  # expected a = 0 MB
  # fail
  # cleanup
  # [FAIL]
  not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
  # 0
  # SUMMARY: PASS=10 SKIP=0 FAIL=1

Signed-off-by: Li Wang <liwang@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Shuah Khan <shuah@kernel.org>
---
 .../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
 1 file changed, 41 insertions(+), 55 deletions(-)

diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..1c172c6999f4 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
     do_umount=1
   fi
 fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
 
 function get_machine_hugepage_size() {
   hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -60,6 +60,41 @@ function cleanup() {
   set -e
 }
 
+function assert_with_retry() {
+  local actual_path="$1"
+  local expected="$2"
+  local tolerance=$((7 * 1024 * 1024))
+  local timeout=20
+  local interval=1
+  local start_time
+  local now
+  local elapsed
+  local actual
+
+  start_time=$(date +%s)
+
+  while true; do
+    actual="$(cat "$actual_path")"
+
+    if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+        [[ $actual -le $(($expected + $tolerance)) ]]; then
+      return 0
+    fi
+
+    now=$(date +%s)
+    elapsed=$((now - start_time))
+
+    if [[ $elapsed -ge $timeout ]]; then
+      echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+      echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+      cleanup
+      exit 1
+    fi
+
+    sleep $interval
+  done
+}
+
 function assert_state() {
   local expected_a="$1"
   local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
     expected_b="$3"
     expected_b_hugetlb="$4"
   fi
-  local tolerance=$((5 * 1024 * 1024))
-
-  local actual_a
-  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
-  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
-    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
-    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
-    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  local actual_a_hugetlb
-  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
-    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
-    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
-    return
-  fi
-
-  local actual_b
-  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
-  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
-    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
-    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
-    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
 
-  local actual_b_hugetlb
-  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
-    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
-    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
+  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
 
-    cleanup
-    exit 1
+  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
   fi
 }
 
@@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
 
 cleanup
 
-echo
 echo
 echo Test charge, rmdir, uncharge
 setup
@@ -195,7 +184,6 @@ cleanup
 
 echo done
 echo
-echo
 if [[ ! $cgroup2 ]]; then
   echo "Test parent and child hugetlb usage"
   setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
   assert_state 0 $(($size * 2)) 0 $size
 
   rmdir "$CGROUP_ROOT"/a/b
-  sleep 5
   echo Assert memory reparent correctly.
   assert_state 0 $(($size * 2))
 
@@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
   cleanup
 fi
 
-echo
 echo
 echo "Test child only hugetlb usage"
 echo setup
-- 
2.48.1
Re: [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
Posted by Donet Tom 10 months ago
On 4/7/25 2:12 PM, Li Wang wrote:
> In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
> This can cause test flakiness when immediately asserting usage after
> deleting a child cgroup. To address this, add a helper function
> `assert_with_retry()` that checks usage values with a timeout-based retry.
> This improves test stability without relying on fixed sleep delays.
>
> Also bump up the tolerance size to 7MB.
>
> To avoid False Positives:
>    ...
>    # Assert memory charged correctly for child only use.
>    # actual a = 11 MB
>    # expected a = 0 MB
>    # fail
>    # cleanup
>    # [FAIL]
>    not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
>    # 0
>    # SUMMARY: PASS=10 SKIP=0 FAIL=1


I was also seeing this failure. I have tested this patch on my powerPC
setup and it is passing now.

./hugetlb_reparenting_test.sh -cgroup-v2
cleanup

Test charge, rmdir, uncharge
mkdir
write
Writing to this path: /mnt/huge/test
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

rmdir
uncharge
cleanup
done


Test child only hugetlb usage
setup
write
Writing to this path: /mnt/huge/test2
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

Assert memory charged correctly for child only use.
actual = 10 MB
expected = 0 MB
cleanup


Feel free to add
Tested-by Donet Tom <donettom@linux.ibm.com>


>
> Signed-off-by: Li Wang <liwang@redhat.com>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
> Cc: Shuah Khan <shuah@kernel.org>
> ---
>   .../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
>   1 file changed, 41 insertions(+), 55 deletions(-)
>
> diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> index 11f9bbe7dc22..1c172c6999f4 100755
> --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> @@ -36,7 +36,7 @@ else
>       do_umount=1
>     fi
>   fi
> -MNT='/mnt/huge/'
> +MNT='/mnt/huge'
>   
>   function get_machine_hugepage_size() {
>     hpz=$(grep -i hugepagesize /proc/meminfo)
> @@ -60,6 +60,41 @@ function cleanup() {
>     set -e
>   }
>   
> +function assert_with_retry() {
> +  local actual_path="$1"
> +  local expected="$2"
> +  local tolerance=$((7 * 1024 * 1024))
> +  local timeout=20
> +  local interval=1
> +  local start_time
> +  local now
> +  local elapsed
> +  local actual
> +
> +  start_time=$(date +%s)
> +
> +  while true; do
> +    actual="$(cat "$actual_path")"
> +
> +    if [[ $actual -ge $(($expected - $tolerance)) ]] &&
> +        [[ $actual -le $(($expected + $tolerance)) ]]; then
> +      return 0
> +    fi
> +
> +    now=$(date +%s)
> +    elapsed=$((now - start_time))
> +
> +    if [[ $elapsed -ge $timeout ]]; then
> +      echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
> +      echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
> +      cleanup
> +      exit 1
> +    fi
> +
> +    sleep $interval
> +  done
> +}
> +
>   function assert_state() {
>     local expected_a="$1"
>     local expected_a_hugetlb="$2"
> @@ -70,58 +105,13 @@ function assert_state() {
>       expected_b="$3"
>       expected_b_hugetlb="$4"
>     fi
> -  local tolerance=$((5 * 1024 * 1024))
> -
> -  local actual_a
> -  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
> -  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
> -    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
> -    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
> -    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
> -
> -  local actual_a_hugetlb
> -  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
> -  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
> -    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
> -    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
> -    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
> -
> -  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
> -    return
> -  fi
> -
> -  local actual_b
> -  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
> -  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
> -    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
> -    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
> -    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
>   
> -  local actual_b_hugetlb
> -  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
> -  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
> -    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
> -    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
> -    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
> -    echo fail
> +  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
> +  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
>   
> -    cleanup
> -    exit 1
> +  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
> +    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
> +    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
>     fi
>   }
>   
> @@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
>   
>   cleanup
>   
> -echo
>   echo
>   echo Test charge, rmdir, uncharge
>   setup
> @@ -195,7 +184,6 @@ cleanup
>   
>   echo done
>   echo
> -echo
>   if [[ ! $cgroup2 ]]; then
>     echo "Test parent and child hugetlb usage"
>     setup
> @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
>     assert_state 0 $(($size * 2)) 0 $size
>   
>     rmdir "$CGROUP_ROOT"/a/b
> -  sleep 5
>     echo Assert memory reparent correctly.
>     assert_state 0 $(($size * 2))
>   
> @@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
>     cleanup
>   fi
>   
> -echo
>   echo
>   echo "Test child only hugetlb usage"
>   echo setup