[PATCH 10/10] selftets/damon/sysfs.py: pause DAMON before dumping status

SeongJae Park posted 10 patches 1 week, 6 days ago
There is a newer version of this series
[PATCH 10/10] selftets/damon/sysfs.py: pause DAMON before dumping status
Posted by SeongJae Park 1 week, 6 days ago
The sysfs.py test commits DAMON parameters, dump the internal DAMON
state, and show if the parameters are committed as expected using the
dumped state.  While the dumping is ongoing, DAMON is alive.  It can
make internal changes including addition and removal of regions.  It can
therefore make a race that can result in false test results.  Pause
DAMON execution during the state dumping to avoid such races.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.py | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
index e6d34ba05893f..7a4cd915d5bc9 100755
--- a/tools/testing/selftests/damon/sysfs.py
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -193,18 +193,53 @@ def assert_ctx_committed(ctx, dump):
     assert_true(dump['pause'] == ctx.pause, 'pause', dump)
 
 def assert_ctxs_committed(kdamonds):
+    ctxs_paused_for_dump = []
+    # pause for safe state dumping
+    for kd in kdamonds.kdamonds:
+        for ctx in kd.contexts:
+            if ctx.pause is False:
+                ctx.pause = True
+                ctxs_paused_for_dump.append(ctx)
+        if len(ctxs_paused_for_dump) > 0:
+            err = kd.commit()
+            if err is not None:
+                print('pause fail (%s)' % err)
+                kdamonds.stop()
+                exit(1)
+
     status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
     if err is not None:
         print(err)
         kdamonds.stop()
         exit(1)
 
+    # resume contexts paused for safe state dumping
+    for kd in kdamonds.kdamonds:
+        for ctx in ctxs_paused_for_dump:
+            ctx.pause = False
+        if len(ctxs_paused_for_dump) > 0:
+            err = kd.commit()
+            if err is not None:
+                print('resume fail (%s)' % err)
+                kdamonds.stop()
+                exit(1)
+
+    # restore for comparison
+    for ctx in ctxs_paused_for_dump:
+        ctx.pause = True
+
     ctxs = kdamonds.kdamonds[0].contexts
     dump = status['contexts']
     assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
     for idx, ctx in enumerate(ctxs):
         assert_ctx_committed(ctx, dump[idx])
 
+    # restore for the caller
+    for kd in kdamonds.kdamonds:
+        for ctx in kd.contexts:
+            if ctx in ctxs_paused_for_dump:
+                ctx.pause = False
+
 def main():
     kdamonds = _damon_sysfs.Kdamonds(
             [_damon_sysfs.Kdamond(
@@ -302,6 +337,7 @@ def main():
         print('kdamond start failed: %s' % err)
         exit(1)
     kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+    kdamonds.kdamonds[0].contexts[0].pause = True
     kdamonds.kdamonds[0].commit()
     del kdamonds.kdamonds[0].contexts[0].targets[1]
     assert_ctxs_committed(kdamonds)
-- 
2.47.3
Re: (sashiko) [PATCH 10/10] selftets/damon/sysfs.py: pause DAMON before dumping status
Posted by SeongJae Park 1 week, 6 days ago
Forwarding Sashiko review for doing discussions via mails.

# review url: https://sashiko.dev/#/patchset/20260321181343.93971-11-sj@kernel.org
# start of sashiko.dev inline review
commit 019bf04e4448bbce4f6d092792a1b959f718f39a
Author: SeongJae Park <sj@kernel.org>

selftets/damon/sysfs.py: pause DAMON before dumping status

This isn't a bug, but there is a typo (selftets) in the subject line.

This commit updates the DAMON sysfs selftest to pause DAMON execution
during state dumping. This prevents potential races caused by internal
DAMON changes, such as the addition or removal of regions, while the
dump is ongoing.

> diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
> --- a/tools/testing/selftests/damon/sysfs.py
> +++ b/tools/testing/selftests/damon/sysfs.py
> @@ -193,18 +193,53 @@ def assert_ctx_committed(ctx, dump):
>      assert_true(dump['pause'] == ctx.pause, 'pause', dump)
>  
>  def assert_ctxs_committed(kdamonds):
> +    ctxs_paused_for_dump = []
> +    # pause for safe state dumping
> +    for kd in kdamonds.kdamonds:
> +        for ctx in kd.contexts:
> +            if ctx.pause is False:
> +                ctx.pause = True
> +                ctxs_paused_for_dump.append(ctx)
> +        if len(ctxs_paused_for_dump) > 0:
> +            err = kd.commit()

Since ctxs_paused_for_dump is initialized outside the outer loop, if the
first kdamond appends a context to this list, won't len(ctxs_paused_for_dump)
> 0 evaluate to true for all subsequent kdamonds?

This seems like it would trigger kd.commit() on every subsequent kdamond
even if none of its contexts were actually modified.

[ ... ]

> +    # resume contexts paused for safe state dumping
> +    for kd in kdamonds.kdamonds:
> +        for ctx in ctxs_paused_for_dump:
> +            ctx.pause = False
> +        if len(ctxs_paused_for_dump) > 0:
> +            err = kd.commit()

Does this inner loop repeatedly modify contexts across all kdamonds on
every outer iteration?

Because ctxs_paused_for_dump is shared across all kdamonds, this appears
to redundantly modify and commit contexts for every kdamond instance.
Could ctxs_paused_for_dump be scoped per-kdamond instead to avoid this?

[ ... ]

> +    # restore for comparison
> +    for ctx in ctxs_paused_for_dump:
> +        ctx.pause = True

Since the context was forced to pause before the status dump, and the
python object is forced to pause right here before the comparison,
doesn't this mean the test only ever asserts that True == True?

It appears this entirely skips verifying whether the originally requested
unpaused state was successfully committed to the kernel before the dump
procedure began. Any kernel bug where the pause parameter is stuck as
True might silently pass this test now.

Should the original state be verified by reading the specific sysfs file
before overriding it?

[ ... ]


# end of sashiko.dev inline review
# review url: https://sashiko.dev/#/patchset/20260321181343.93971-11-sj@kernel.org
#
# hkml [1] generated a draft of this mail.  It can be regenerated
# using below command:
#
#     hkml patch sashiko_dev --for_forwarding \
#             20260321181343.93971-11-sj@kernel.org
#
# [1] https://github.com/sjp38/hackermail
Re: (sashiko) [PATCH 10/10] selftets/damon/sysfs.py: pause DAMON before dumping status
Posted by SeongJae Park 1 week, 6 days ago
TL; DR: need to add below attaching fixup.  Unless Andrew pick it together with
this, I will post v2 tomorrow.

On Sat, 21 Mar 2026 13:12:49 -0700 SeongJae Park <sj@kernel.org> wrote:

> Forwarding Sashiko review for doing discussions via mails.
> 
> # review url: https://sashiko.dev/#/patchset/20260321181343.93971-11-sj@kernel.org
> # start of sashiko.dev inline review
> commit 019bf04e4448bbce4f6d092792a1b959f718f39a
> Author: SeongJae Park <sj@kernel.org>
> 
> selftets/damon/sysfs.py: pause DAMON before dumping status
> 
> This isn't a bug, but there is a typo (selftets) in the subject line.

Good eye!  Unless Andrew fixes this up, I will post v2 with the fix tomorrow.

> 
> This commit updates the DAMON sysfs selftest to pause DAMON execution
> during state dumping. This prevents potential races caused by internal
> DAMON changes, such as the addition or removal of regions, while the
> dump is ongoing.
> 
> > diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
> > --- a/tools/testing/selftests/damon/sysfs.py
> > +++ b/tools/testing/selftests/damon/sysfs.py
> > @@ -193,18 +193,53 @@ def assert_ctx_committed(ctx, dump):
> >      assert_true(dump['pause'] == ctx.pause, 'pause', dump)
> >  
> >  def assert_ctxs_committed(kdamonds):
> > +    ctxs_paused_for_dump = []
> > +    # pause for safe state dumping
> > +    for kd in kdamonds.kdamonds:
> > +        for ctx in kd.contexts:
> > +            if ctx.pause is False:
> > +                ctx.pause = True
> > +                ctxs_paused_for_dump.append(ctx)
> > +        if len(ctxs_paused_for_dump) > 0:
> > +            err = kd.commit()
> 
> Since ctxs_paused_for_dump is initialized outside the outer loop, if the
> first kdamond appends a context to this list, won't len(ctxs_paused_for_dump)
> > 0 evaluate to true for all subsequent kdamonds?
> 
> This seems like it would trigger kd.commit() on every subsequent kdamond
> even if none of its contexts were actually modified.

Good eye.  There is no multiple kdamonds use case here, but why not make the
code more complete?  I will fix this, like below.

'''
@@ -201,13 +201,16 @@ def assert_ctx_committed(ctx, dump):

 def assert_ctxs_committed(kdamonds):
     ctxs_paused_for_dump = []
+    kdamonds_paused_for_dump = []
     # pause for safe state dumping
     for kd in kdamonds.kdamonds:
         for ctx in kd.contexts:
             if ctx.pause is False:
                 ctx.pause = True
                 ctxs_paused_for_dump.append(ctx)
-        if len(ctxs_paused_for_dump) > 0:
+                if not kd in kdamonds_paused_for_dump:
+                    kdamonds_paused_for_dump.append(kd)
+        if kd in kdamonds_paused_for_dump:
             err = kd.commit()
             if err is not None:
                 print('pause fail (%s)' % err)
'''

Note that this is not a complete fix.  Similar change will be needed to be made
to later part of this file, too.


> 
> [ ... ]
> 
> > +    # resume contexts paused for safe state dumping
> > +    for kd in kdamonds.kdamonds:
> > +        for ctx in ctxs_paused_for_dump:
> > +            ctx.pause = False
> > +        if len(ctxs_paused_for_dump) > 0:
> > +            err = kd.commit()
> 
> Does this inner loop repeatedly modify contexts across all kdamonds on
> every outer iteration?
> 
> Because ctxs_paused_for_dump is shared across all kdamonds, this appears
> to redundantly modify and commit contexts for every kdamond instance.
> Could ctxs_paused_for_dump be scoped per-kdamond instead to avoid this?

Good eye!  I will fix this like below.

'''
@@ -221,15 +224,14 @@ def assert_ctxs_committed(kdamonds):
         exit(1)

     # resume contexts paused for safe state dumping
-    for kd in kdamonds.kdamonds:
-        for ctx in ctxs_paused_for_dump:
-            ctx.pause = False
-        if len(ctxs_paused_for_dump) > 0:
-            err = kd.commit()
-            if err is not None:
-                print('resume fail (%s)' % err)
-                kdamonds.stop()
-                exit(1)
+    for ctx in ctxs_paused_for_dump:
+        ctx.pause = False
+    if kd in kdamonds_paused_for_dump:
+        err = kd.commit()
+        if err is not None:
+            print('resume fail (%s)' % err)
+            kdamonds.stop()
+            exit(1)

     # restore for comparison
     for ctx in ctxs_paused_for_dump:
'''

> 
> [ ... ]
> 
> > +    # restore for comparison
> > +    for ctx in ctxs_paused_for_dump:
> > +        ctx.pause = True
> 
> Since the context was forced to pause before the status dump, and the
> python object is forced to pause right here before the comparison,
> doesn't this mean the test only ever asserts that True == True?
> 
> It appears this entirely skips verifying whether the originally requested
> unpaused state was successfully committed to the kernel before the dump
> procedure began. Any kernel bug where the pause parameter is stuck as
> True might silently pass this test now.
> 
> Should the original state be verified by reading the specific sysfs file
> before overriding it?

I agree there is the room to improve.  But, there is no good way to test the
feature for making the test safe, at the moment.  We have kunit test for the
pause commit feature, though.  So, I'd not add change for this to this patch.

> 
> [ ... ]
> 
> 
> # end of sashiko.dev inline review
> # review url: https://sashiko.dev/#/patchset/20260321181343.93971-11-sj@kernel.org

So, below attaching fixup is needed for this patch.  Unless Andrew pick it
together with this patch, I will post v2 tomorrow.


Thanks,
SJ

=== >8 ===
From 90acf3dacea715790622c0dd6cf97c6ed97d1105 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sat, 21 Mar 2026 13:43:50 -0700
Subject: [PATCH] selftests/damon/sysfs.py: fixup: avoid unnecessary commit()
 and resume setup

As Sashiko suggested and I agreed.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
index 7a4cd915d5bc9..5f00e97f019f4 100755
--- a/tools/testing/selftests/damon/sysfs.py
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -194,13 +194,16 @@ def assert_ctx_committed(ctx, dump):
 
 def assert_ctxs_committed(kdamonds):
     ctxs_paused_for_dump = []
+    kdamonds_paused_for_dump = []
     # pause for safe state dumping
     for kd in kdamonds.kdamonds:
         for ctx in kd.contexts:
             if ctx.pause is False:
                 ctx.pause = True
                 ctxs_paused_for_dump.append(ctx)
-        if len(ctxs_paused_for_dump) > 0:
+                if not kd in kdamonds_paused_for_dump:
+                    kdamonds_paused_for_dump.append(kd)
+        if kd in kdamonds_paused_for_dump:
             err = kd.commit()
             if err is not None:
                 print('pause fail (%s)' % err)
@@ -214,15 +217,14 @@ def assert_ctxs_committed(kdamonds):
         exit(1)
 
     # resume contexts paused for safe state dumping
-    for kd in kdamonds.kdamonds:
-        for ctx in ctxs_paused_for_dump:
-            ctx.pause = False
-        if len(ctxs_paused_for_dump) > 0:
-            err = kd.commit()
-            if err is not None:
-                print('resume fail (%s)' % err)
-                kdamonds.stop()
-                exit(1)
+    for ctx in ctxs_paused_for_dump:
+        ctx.pause = False
+    if kd in kdamonds_paused_for_dump:
+        err = kd.commit()
+        if err is not None:
+            print('resume fail (%s)' % err)
+            kdamonds.stop()
+            exit(1)
 
     # restore for comparison
     for ctx in ctxs_paused_for_dump:
-- 
2.47.3