[PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use

Alexander Atanasov posted 1 patch 2 weeks ago
tools/testing/selftests/ublk/test_common.sh     | 8 +++++---
tools/testing/selftests/ublk/test_generic_01.sh | 5 +++--
tools/testing/selftests/ublk/test_generic_02.sh | 5 +++--
tools/testing/selftests/ublk/test_generic_12.sh | 5 +++--
4 files changed, 14 insertions(+), 9 deletions(-)
[PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use
Posted by Alexander Atanasov 2 weeks ago
Create a temp dir for temporary files and use it instead of
placing them inside source tree.
Reference files in subdirectories relative to script's source dir
for bpftrace uses.
While there fix a typo.

Signed-off-by: Alexander Atanasov <alex@zazolabs.com>
---
 tools/testing/selftests/ublk/test_common.sh     | 8 +++++---
 tools/testing/selftests/ublk/test_generic_01.sh | 5 +++--
 tools/testing/selftests/ublk/test_generic_02.sh | 5 +++--
 tools/testing/selftests/ublk/test_generic_12.sh | 5 +++--
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 7ff6ce79d62c..ab1ea5cc904a 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -43,7 +43,7 @@ _create_backfile() {
 	old_file="${UBLK_BACKFILES[$index]}"
 	[ -f "$old_file" ] && rm -f "$old_file"
 
-	new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
+	new_file=$(mktemp $TMPDIR/ublk_file_"${new_size}"_XXXXX)
 	truncate -s "${new_size}" "${new_file}"
 	UBLK_BACKFILES["$index"]="$new_file"
 }
@@ -55,6 +55,7 @@ _remove_files() {
 		[ -f "$file" ] && rm -f "$file"
 	done
 	[ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
+	rm -rf "$TMPDIR"
 }
 
 _create_tmp_dir() {
@@ -119,7 +120,7 @@ _prep_test() {
 	local type=$1
 	shift 1
 	modprobe ublk_drv > /dev/null 2>&1
-	UBLK_TMP=$(mktemp ublk_test_XXXXX)
+	UBLK_TMP=$(mktemp $TMPDIR/ublk_test_XXXXX)
 	[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
 }
 
@@ -367,7 +368,7 @@ run_io_and_recover()
 
 	state=$(_recover_ublk_dev -n "$dev_id" "$@")
 	if [ "$state" != "LIVE" ]; then
-		echo "faile to recover to LIVE($state)"
+		echo "failed to recover to LIVE($state)"
 		return 255
 	fi
 
@@ -401,3 +402,4 @@ UBLK_BACKFILES=()
 export UBLK_PROG
 export UBLK_TEST_QUIET
 export UBLK_TEST_SHOW_RESULT
+export TMPDIR=$(mktemp -d ${TMPDIR:-/tmp}/ublktest-dir.XXXXXX)
diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
index 21a31cd5491a..5b06beee91ca 100755
--- a/tools/testing/selftests/ublk/test_generic_01.sh
+++ b/tools/testing/selftests/ublk/test_generic_01.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TDIR=$(cd "$(dirname "$0")" && pwd)
+. $TDIR/test_common.sh
 
 TID="generic_01"
 ERR_CODE=0
@@ -20,7 +21,7 @@ dev_id=$(_add_ublk_dev -t null)
 _check_add_dev $TID $?
 
 dev_t=$(_get_disk_dev_t "$dev_id")
-bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
+bpftrace $TDIR/trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
 btrace_pid=$!
 sleep 2
 
diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh
index 12920768b1a0..1c0e3ab8afc7 100755
--- a/tools/testing/selftests/ublk/test_generic_02.sh
+++ b/tools/testing/selftests/ublk/test_generic_02.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TDIR=$(cd "$(dirname "$0")" && pwd)
+. $TDIR/test_common.sh
 
 TID="generic_02"
 ERR_CODE=0
@@ -20,7 +21,7 @@ dev_id=$(_add_ublk_dev -t null -q 2)
 _check_add_dev $TID $?
 
 dev_t=$(_get_disk_dev_t "$dev_id")
-bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
+bpftrace $TDIR/trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
 btrace_pid=$!
 sleep 2
 
diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh
index b4046201b4d9..d4240ec94827 100755
--- a/tools/testing/selftests/ublk/test_generic_12.sh
+++ b/tools/testing/selftests/ublk/test_generic_12.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TDIR=$(cd "$(dirname "$0")" && pwd)
+. $TDIR/test_common.sh
 
 TID="generic_12"
 ERR_CODE=0
@@ -21,7 +22,7 @@ dev_id=$(_add_ublk_dev -t null -q 4 -d 16 --nthreads $NTHREADS --per_io_tasks)
 _check_add_dev $TID $?
 
 dev_t=$(_get_disk_dev_t "$dev_id")
-bpftrace trace/count_ios_per_tid.bt "$dev_t" > "$UBLK_TMP" 2>&1 &
+bpftrace $TDIR/trace/count_ios_per_tid.bt "$dev_t" > "$UBLK_TMP" 2>&1 &
 btrace_pid=$!
 sleep 2
 
-- 
2.43.0
Re: [PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use
Posted by Ming Lei 2 weeks ago
On Fri, Jan 23, 2026 at 11:20:36AM +0000, Alexander Atanasov wrote:
> Create a temp dir for temporary files and use it instead of
> placing them inside source tree.

Many temporary files are backing files of file storage target, so far
the code requires O_DIRECT, or the size could be a bit big.

In case of ramfs/tmpfs of temp dir, it may cause problem for tests.


Thanks, 
Ming
Re: [PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use
Posted by Alexander Atanasov 2 weeks ago
On 23 Jan 2026, at 15:33, Ming Lei <ming.lei@redhat.com> wrote:
> 
> On Fri, Jan 23, 2026 at 11:20:36AM +0000, Alexander Atanasov wrote:
>> Create a temp dir for temporary files and use it instead of
>> placing them inside source tree.
> 
> Many temporary files are backing files of file storage target, so far
> the code requires O_DIRECT, or the size could be a bit big.
> 
> In case of ramfs/tmpfs of temp dir, it may cause problem for tests.
> 

I am aware of O_DIRECT problem but you can export different TMPDIR that has working O_DIRECT.

I use sshfs mount of the build to run the tests and that is a problem sshfs/fuse does not
do O_DIRECT too.

I think test_generic_06.sh is the only one that fails due to this(thou I still have to investigate).

If O_DIRECT is required by the tests it may be possible to go thru a RAM disk which does support it,
so it works eveerywhere

Other option is to preserve working in source tree as it is now, and just add a variable to specify working directory -
UBLK_TMPDIR or something.


I get a lot of out of order io - between 0 and 10 on average on my test setup:
tools/testing/selftests/ublk/test_generic_01.sh 
Attached 3 probes
io_out_of_order: exp 564688 actual 564648
io_out_of_order: exp 564648 actual 565584
io_out_of_order: exp 565584 actual 564688
io_out_of_order: exp 565592 actual 564688
io_out_of_order: exp 566328 actual 565592
io_out_of_order: exp 882256 actual 882248
io_out_of_order: exp 883032 actual 882912
io_out_of_order: exp 882912 actual 883040
io_out_of_order: exp 883040 actual 883032


generic_01 : [FAIL]

All rq-s are there just reordered , AFAIK blk-mq does not guarantee that requests will be completed in order, what’s the idea to catch this and
consider it an error? (Latest tree with batch io and batch io fixes on top of if that matters)


Regards,
Alexander Atanasov
Re: [PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use
Posted by Ming Lei 2 weeks ago
On Fri, Jan 23, 2026 at 03:59:31PM +0200, Alexander Atanasov wrote:
> On 23 Jan 2026, at 15:33, Ming Lei <ming.lei@redhat.com> wrote:
> > 
> > On Fri, Jan 23, 2026 at 11:20:36AM +0000, Alexander Atanasov wrote:
> >> Create a temp dir for temporary files and use it instead of
> >> placing them inside source tree.
> > 
> > Many temporary files are backing files of file storage target, so far
> > the code requires O_DIRECT, or the size could be a bit big.
> > 
> > In case of ramfs/tmpfs of temp dir, it may cause problem for tests.
> > 
> 
> I am aware of O_DIRECT problem but you can export different TMPDIR that has working O_DIRECT.

Can you share how to export TMPDIR capable of O_DIRECT?

> 
> I use sshfs mount of the build to run the tests and that is a problem sshfs/fuse does not
> do O_DIRECT too.
> 
> I think test_generic_06.sh is the only one that fails due to this(thou I still have to investigate).
> 
> If O_DIRECT is required by the tests it may be possible to go thru a RAM disk which does support it,
> so it works eveerywhere
> 
> Other option is to preserve working in source tree as it is now, and just add a variable to specify working directory -
> UBLK_TMPDIR or something.
> 
> 
> I get a lot of out of order io - between 0 and 10 on average on my test setup:
> tools/testing/selftests/ublk/test_generic_01.sh 
> Attached 3 probes
> io_out_of_order: exp 564688 actual 564648
> io_out_of_order: exp 564648 actual 565584
> io_out_of_order: exp 565584 actual 564688
> io_out_of_order: exp 565592 actual 564688
> io_out_of_order: exp 566328 actual 565592
> io_out_of_order: exp 882256 actual 882248
> io_out_of_order: exp 883032 actual 882912
> io_out_of_order: exp 882912 actual 883040
> io_out_of_order: exp 883040 actual 883032
> 
> 
> generic_01 : [FAIL]
> 
> All rq-s are there just reordered , AFAIK blk-mq does not guarantee that requests will be completed in order, what’s the idea to catch this and

If there is just 0 ~ 10, it could be fine. But if all are reorderd,
something must be wrong. One improvement could be check if there is too
many reorder...

Actually what I am trying to test is to make sure same order is observed
from both ublk driver dispatch code path and ublk target io handling code
path, because io_uring task work schedule uses llist, which may introduce io
reorder.

However, that involves ublk kprobe/kfunc trace, which may not be stable,
so I simply check the end-to-end IO order. Sometimes blk-mq IO queue/dispatch
may re-order IO.

I guess the following change may avoid the re-order, but batch IO case may
not be covered:

diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
index 21a31cd5491a..5805da4c84c5 100755
--- a/tools/testing/selftests/ublk/test_generic_01.sh
+++ b/tools/testing/selftests/ublk/test_generic_01.sh
@@ -29,14 +29,8 @@ if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
        exit "$UBLK_SKIP_CODE"
 fi

-# run fio over this ublk disk
-fio --name=write_seq \
-    --filename=/dev/ublkb"${dev_id}" \
-    --ioengine=libaio --iodepth=16 \
-    --rw=write \
-    --size=512M \
-    --direct=1 \
-    --bs=4k > /dev/null 2>&1
+taskset -c 0 dd if=/dev/zero of=/dev/ublkb"${dev_id}" bs=1M count=256 oflag=direct > /dev/null 2>&1
+


> consider it an error? (Latest tree with batch io and batch io fixes on top of if that matters)

Never observe generic_01 failure in my test VM and hardware.

My kernel config is based on Fedora, maybe scheduler config option makes the difference.



Thanks,
Ming

Re: [PATCH] selftests: ublk: use tmpdir for scratch files and improve relative paths use
Posted by Alexander Atanasov 2 weeks ago

> On 23 Jan 2026, at 16:33, Ming Lei <ming.lei@redhat.com> wrote:
> 
> On Fri, Jan 23, 2026 at 03:59:31PM +0200, Alexander Atanasov wrote:
>> On 23 Jan 2026, at 15:33, Ming Lei <ming.lei@redhat.com> wrote:
>>> 
>>> On Fri, Jan 23, 2026 at 11:20:36AM +0000, Alexander Atanasov wrote:
>>>> Create a temp dir for temporary files and use it instead of
>>>> placing them inside source tree.
>>> 
>>> Many temporary files are backing files of file storage target, so far
>>> the code requires O_DIRECT, or the size could be a bit big.
>>> 
>>> In case of ramfs/tmpfs of temp dir, it may cause problem for tests.
>>> 
>> 
>> I am aware of O_DIRECT problem but you can export different TMPDIR that has working O_DIRECT.
> 
> Can you share how to export TMPDIR capable of O_DIRECT?


+export TMPDIR=$(mktemp -d ${TMPDIR:-/tmp}/ublktest-dir.XXXXXX)

I made the tests to run in own TMPDIR. Which is under already set TMPDIR or
if TMPDIR is not set it is defaults to  /tmp.

export TMPDIR=/path/to/odirect/capable
Before running and tests will run in:
/path/to/odirect/capable/ublktest-dir.XXXXXX


> 
>> 
>> I use sshfs mount of the build to run the tests and that is a problem sshfs/fuse does not
>> do O_DIRECT too.
>> 
>> I think test_generic_06.sh is the only one that fails due to this(thou I still have to investigate).
>> 
>> If O_DIRECT is required by the tests it may be possible to go thru a RAM disk which does support it,
>> so it works eveerywhere
>> 
>> Other option is to preserve working in source tree as it is now, and just add a variable to specify working directory -
>> UBLK_TMPDIR or something.
>> 
>> 
>> I get a lot of out of order io - between 0 and 10 on average on my test setup:
>> tools/testing/selftests/ublk/test_generic_01.sh 
>> Attached 3 probes
>> io_out_of_order: exp 564688 actual 564648
>> io_out_of_order: exp 564648 actual 565584
>> io_out_of_order: exp 565584 actual 564688
>> io_out_of_order: exp 565592 actual 564688
>> io_out_of_order: exp 566328 actual 565592
>> io_out_of_order: exp 882256 actual 882248
>> io_out_of_order: exp 883032 actual 882912
>> io_out_of_order: exp 882912 actual 883040
>> io_out_of_order: exp 883040 actual 883032
>> 
>> 
>> generic_01 : [FAIL]
>> 
>> All rq-s are there just reordered , AFAIK blk-mq does not guarantee that requests will be completed in order, what’s the idea to catch this and
> 
> If there is just 0 ~ 10, it could be fine. But if all are reorderd,
> something must be wrong. One improvement could be check if there is too
> many reorder...
> 
> Actually what I am trying to test is to make sure same order is observed
> from both ublk driver dispatch code path and ublk target io handling code
> path, because io_uring task work schedule uses llist, which may introduce io
> reorder.

There are for sure other places where a reordering can be introduced, so the code should be ready and expecting 
It. (For my case see bellow) Is preserving the order required for some reason for ublk?

> 
> However, that involves ublk kprobe/kfunc trace, which may not be stable,
> so I simply check the end-to-end IO order. Sometimes blk-mq IO queue/dispatch
> may re-order IO.
> 
> I guess the following change may avoid the re-order, but batch IO case may
> not be covered:
> 
> diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> index 21a31cd5491a..5805da4c84c5 100755
> --- a/tools/testing/selftests/ublk/test_generic_01.sh
> +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> @@ -29,14 +29,8 @@ if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
>        exit "$UBLK_SKIP_CODE"
> fi
> 
> -# run fio over this ublk disk
> -fio --name=write_seq \
> -    --filename=/dev/ublkb"${dev_id}" \
> -    --ioengine=libaio --iodepth=16 \
> -    --rw=write \
> -    --size=512M \
> -    --direct=1 \
> -    --bs=4k > /dev/null 2>&1
> +taskset -c 0 dd if=/dev/zero of=/dev/ublkb"${dev_id}" bs=1M count=256 oflag=direct > /dev/null 2>&1
> +
> 
> 
>> consider it an error? (Latest tree with batch io and batch io fixes on top of if that matters)
> 
> Never observe generic_01 failure in my test VM and hardware.
> 
> My kernel config is based on Fedora, maybe scheduler config option makes the difference.

Fedora 43 default config with some debugging options enabled, but no changes in schedulers.
Test VM storage is on a networked NAS over iSCSI - both boxes VM host and NAS have two NICs,
I get the errors when I load the network. So I believe the requests really complete out of 
order due to the network in my case. All tests that have the bpftrace check fail on occasion.


Regards,
Alexander Atanasov
Re: [PATCH] selftests: ublk: io-reorder triggered in test_generic_01.sh
Posted by Ming Lei 1 week, 5 days ago
On Fri, Jan 23, 2026 at 05:00:33PM +0200, Alexander Atanasov wrote:
> 
> 
> > On 23 Jan 2026, at 16:33, Ming Lei <ming.lei@redhat.com> wrote:
> > 
> > On Fri, Jan 23, 2026 at 03:59:31PM +0200, Alexander Atanasov wrote:
> >> On 23 Jan 2026, at 15:33, Ming Lei <ming.lei@redhat.com> wrote:
> >>> 
> >>> On Fri, Jan 23, 2026 at 11:20:36AM +0000, Alexander Atanasov wrote:
> >>>> Create a temp dir for temporary files and use it instead of
> >>>> placing them inside source tree.
> >>> 
> >>> Many temporary files are backing files of file storage target, so far
> >>> the code requires O_DIRECT, or the size could be a bit big.
> >>> 
> >>> In case of ramfs/tmpfs of temp dir, it may cause problem for tests.
> >>> 
> >> 
> >> I am aware of O_DIRECT problem but you can export different TMPDIR that has working O_DIRECT.
> > 
> > Can you share how to export TMPDIR capable of O_DIRECT?
> 
> 
> +export TMPDIR=$(mktemp -d ${TMPDIR:-/tmp}/ublktest-dir.XXXXXX)
> 
> I made the tests to run in own TMPDIR. Which is under already set TMPDIR or
> if TMPDIR is not set it is defaults to  /tmp.
> 
> export TMPDIR=/path/to/odirect/capable
> Before running and tests will run in:
> /path/to/odirect/capable/ublktest-dir.XXXXXX
> 
> 
> > 
> >> 
> >> I use sshfs mount of the build to run the tests and that is a problem sshfs/fuse does not
> >> do O_DIRECT too.
> >> 
> >> I think test_generic_06.sh is the only one that fails due to this(thou I still have to investigate).
> >> 
> >> If O_DIRECT is required by the tests it may be possible to go thru a RAM disk which does support it,
> >> so it works eveerywhere
> >> 
> >> Other option is to preserve working in source tree as it is now, and just add a variable to specify working directory -
> >> UBLK_TMPDIR or something.
> >> 
> >> 
> >> I get a lot of out of order io - between 0 and 10 on average on my test setup:
> >> tools/testing/selftests/ublk/test_generic_01.sh 
> >> Attached 3 probes
> >> io_out_of_order: exp 564688 actual 564648
> >> io_out_of_order: exp 564648 actual 565584
> >> io_out_of_order: exp 565584 actual 564688
> >> io_out_of_order: exp 565592 actual 564688
> >> io_out_of_order: exp 566328 actual 565592
> >> io_out_of_order: exp 882256 actual 882248
> >> io_out_of_order: exp 883032 actual 882912
> >> io_out_of_order: exp 882912 actual 883040
> >> io_out_of_order: exp 883040 actual 883032
> >> 
> >> 
> >> generic_01 : [FAIL]
> >> 
> >> All rq-s are there just reordered , AFAIK blk-mq does not guarantee that requests will be completed in order, what’s the idea to catch this and
> > 
> > If there is just 0 ~ 10, it could be fine. But if all are reorderd,
> > something must be wrong. One improvement could be check if there is too
> > many reorder...
> > 
> > Actually what I am trying to test is to make sure same order is observed
> > from both ublk driver dispatch code path and ublk target io handling code
> > path, because io_uring task work schedule uses llist, which may introduce io
> > reorder.
> 
> There are for sure other places where a reordering can be introduced, so the code should be ready and expecting 
> It. (For my case see bellow) Is preserving the order required for some reason for ublk?
> 
> > 
> > However, that involves ublk kprobe/kfunc trace, which may not be stable,
> > so I simply check the end-to-end IO order. Sometimes blk-mq IO queue/dispatch
> > may re-order IO.
> > 
> > I guess the following change may avoid the re-order, but batch IO case may
> > not be covered:
> > 
> > diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> > index 21a31cd5491a..5805da4c84c5 100755
> > --- a/tools/testing/selftests/ublk/test_generic_01.sh
> > +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> > @@ -29,14 +29,8 @@ if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
> >        exit "$UBLK_SKIP_CODE"
> > fi
> > 
> > -# run fio over this ublk disk
> > -fio --name=write_seq \
> > -    --filename=/dev/ublkb"${dev_id}" \
> > -    --ioengine=libaio --iodepth=16 \
> > -    --rw=write \
> > -    --size=512M \
> > -    --direct=1 \
> > -    --bs=4k > /dev/null 2>&1
> > +taskset -c 0 dd if=/dev/zero of=/dev/ublkb"${dev_id}" bs=1M count=256 oflag=direct > /dev/null 2>&1
> > +
> > 
> > 
> >> consider it an error? (Latest tree with batch io and batch io fixes on top of if that matters)
> > 
> > Never observe generic_01 failure in my test VM and hardware.
> > 
> > My kernel config is based on Fedora, maybe scheduler config option makes the difference.
> 
> Fedora 43 default config with some debugging options enabled, but no changes in schedulers.
> Test VM storage is on a networked NAS over iSCSI - both boxes VM host and NAS have two NICs,
> I get the errors when I load the network. So I believe the requests really complete out of 
> order due to the network in my case. All tests that have the bpftrace check fail on occasion.

Can you test the following patch and see if re-order still can happen? 


diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
index 26cf3c7ceeb5..26d5e52ece29 100755
--- a/tools/testing/selftests/ublk/test_generic_01.sh
+++ b/tools/testing/selftests/ublk/test_generic_01.sh
@@ -13,7 +13,7 @@ if ! _have_program fio; then
 	exit "$UBLK_SKIP_CODE"
 fi
 
-_prep_test "null" "sequential io order"
+_prep_test "null" "ublk dispatch won't reorder IO"
 
 dev_id=$(_add_ublk_dev -t null)
 _check_add_dev $TID $?
@@ -39,9 +39,13 @@ fio --name=write_seq \
 ERR_CODE=$?
 kill "$btrace_pid"
 wait
-if grep -q "io_out_of_order" "$UBLK_TMP"; then
-	cat "$UBLK_TMP"
+
+# Check for out-of-order completions detected by bpftrace
+if grep -q "^out_of_order:" "$UBLK_TMP"; then
+	echo "I/O reordering detected:"
+	grep "^out_of_order:" "$UBLK_TMP"
 	ERR_CODE=255
 fi
+
 _cleanup_test "null"
 _show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
index b2f60a92b118..60ac40e66606 100644
--- a/tools/testing/selftests/ublk/trace/seq_io.bt
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -2,23 +2,45 @@
 	$1: 	dev_t
 	$2: 	RWBS
 	$3:     strlen($2)
+
+	Track request order between block_io_start and block_rq_complete.
+	For each request, record its start sequence number and verify
+	completions happen in the same order.
 */
+
 BEGIN {
-	@last_rw[$1, str($2)] = (uint64)0;
+	@start_seq = (uint64)0;
+	@complete_seq = (uint64)0;
+	@out_of_order = (uint64)0;
+}
+
+tracepoint:block:block_io_start
+{
+	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
+		@start_order[args.sector] = @start_seq;
+		@start_seq = @start_seq + 1;
+	}
 }
+
 tracepoint:block:block_rq_complete
 {
-	$dev = $1;
 	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
-		$last = @last_rw[$dev, str($2)];
-		if ((uint64)args.sector != $last) {
-			printf("io_out_of_order: exp %llu actual %llu\n",
-				args.sector, $last);
+		$expected_order = @start_order[args.sector];
+		if ($expected_order != @complete_seq) {
+			printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
+				args.sector, $expected_order, @complete_seq);
+			@out_of_order = @out_of_order + 1;
 		}
-		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
+		delete(@start_order[args.sector]);
+		@complete_seq = @complete_seq + 1;
 	}
 }
 
 END {
-	clear(@last_rw);
+	printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
+		@start_seq, @complete_seq, @out_of_order);
+	clear(@start_order);
+	clear(@start_seq);
+	clear(@complete_seq);
+	clear(@out_of_order);
 }

Thanks,
Ming

Re: [PATCH] selftests: ublk: io-reorder triggered in test_generic_01.sh
Posted by Alexander Atanasov 1 week, 5 days ago
On 25.01.26 17:28, Ming Lei wrote:
> On Fri, Jan 23, 2026 at 05:00:33PM +0200, Alexander Atanasov wrote:
>>
>>

[snip]

> Can you test the following patch and see if re-order still can happen?
> 
> 
> diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> index 26cf3c7ceeb5..26d5e52ece29 100755
> --- a/tools/testing/selftests/ublk/test_generic_01.sh
> +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> @@ -13,7 +13,7 @@ if ! _have_program fio; then
>   	exit "$UBLK_SKIP_CODE"
>   fi
>   
> -_prep_test "null" "sequential io order"
> +_prep_test "null" "ublk dispatch won't reorder IO"
>   
>   dev_id=$(_add_ublk_dev -t null)
>   _check_add_dev $TID $?
> @@ -39,9 +39,13 @@ fio --name=write_seq \
>   ERR_CODE=$?
>   kill "$btrace_pid"
>   wait
> -if grep -q "io_out_of_order" "$UBLK_TMP"; then
> -	cat "$UBLK_TMP"
> +
> +# Check for out-of-order completions detected by bpftrace
> +if grep -q "^out_of_order:" "$UBLK_TMP"; then
> +	echo "I/O reordering detected:"
> +	grep "^out_of_order:" "$UBLK_TMP"
>   	ERR_CODE=255
>   fi
> +
>   _cleanup_test "null"
>   _show_result $TID $ERR_CODE
> diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
> index b2f60a92b118..60ac40e66606 100644
> --- a/tools/testing/selftests/ublk/trace/seq_io.bt
> +++ b/tools/testing/selftests/ublk/trace/seq_io.bt
> @@ -2,23 +2,45 @@
>   	$1: 	dev_t
>   	$2: 	RWBS
>   	$3:     strlen($2)
> +
> +	Track request order between block_io_start and block_rq_complete.
> +	For each request, record its start sequence number and verify
> +	completions happen in the same order.
>   */
> +
>   BEGIN {
> -	@last_rw[$1, str($2)] = (uint64)0;
> +	@start_seq = (uint64)0;
> +	@complete_seq = (uint64)0;
> +	@out_of_order = (uint64)0;
> +}
> +
> +tracepoint:block:block_io_start
> +{
> +	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> +		@start_order[args.sector] = @start_seq;
> +		@start_seq = @start_seq + 1;
> +	}
>   }
> +
>   tracepoint:block:block_rq_complete
>   {
> -	$dev = $1;
>   	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> -		$last = @last_rw[$dev, str($2)];
> -		if ((uint64)args.sector != $last) {
> -			printf("io_out_of_order: exp %llu actual %llu\n",
> -				args.sector, $last);
> +		$expected_order = @start_order[args.sector];
> +		if ($expected_order != @complete_seq) {
> +			printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
> +				args.sector, $expected_order, @complete_seq);
> +			@out_of_order = @out_of_order + 1;
>   		}
> -		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
> +		delete(@start_order[args.sector]);
> +		@complete_seq = @complete_seq + 1;
>   	}
>   }
>   
>   END {
> -	clear(@last_rw);
> +	printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
> +		@start_seq, @complete_seq, @out_of_order);
> +	clear(@start_order);
> +	clear(@start_seq);
> +	clear(@complete_seq);
> +	clear(@out_of_order);
>   }


First run after reboot always fails:

./test_generic_01.sh
ublk selftest: generic_01 starting at 2026-01-25T19:08:47+02:00
I/O reordering detected:
out_of_order: sector 112040 started at seq 88 but completed at seq 0
out_of_order: sector 112048 started at seq 89 but completed at seq 1
out_of_order: sector 112056 started at seq 90 but completed at seq 2
out_of_order: sector 112064 started at seq 91 but completed at seq 3
out_of_order: sector 112072 started at seq 92 but completed at seq 4
out_of_order: sector 112080 started at seq 93 but completed at seq 5
out_of_order: sector 112088 started at seq 94 but completed at seq 6
out_of_order: sector 112096 started at seq 95 but completed at seq 7
out_of_order: sector 112104 started at seq 96 but completed at seq 8
out_of_order: sector 112112 started at seq 97 but completed at seq 9
out_of_order: sector 112120 started at seq 98 but completed at seq 10
out_of_order: sector 112128 started at seq 99 but completed at seq 11

[snip]

out_of_order: sector 1048528 started at seq 117149 but completed at seq 
117061
out_of_order: sector 1048536 started at seq 117150 but completed at seq 
117062
out_of_order: sector 1048544 started at seq 117151 but completed at seq 
117063
out_of_order: sector 1048552 started at seq 117152 but completed at seq 
117064
out_of_order: sector 1048560 started at seq 117153 but completed at seq 
117065
out_of_order: sector 1048568 started at seq 117154 but completed at seq 
117066
ublk selftest: generic_01 done at 2026-01-25T19:08:54+02:00
generic_01 : [FAIL]

consecutive runs are okay

./test_generic_01.sh
ublk selftest: generic_01 starting at 2026-01-25T20:21:31+02:00
ublk selftest: generic_01 done at 2026-01-25T20:21:38+02:00
generic_01 : [PASS]

I went thru the reboot cycle several times - it  repeats.

Tried with and without setting the affinity - and i observed no difference.

I will do some more testing but it was easier to reproduce before that 
patch.

-- 
have fun,
alex
Re: [PATCH] selftests: ublk: io-reorder triggered in test_generic_01.sh
Posted by Ming Lei 1 week, 5 days ago
On Sun, Jan 25, 2026 at 08:35:20PM +0200, Alexander Atanasov wrote:
> On 25.01.26 17:28, Ming Lei wrote:
> > On Fri, Jan 23, 2026 at 05:00:33PM +0200, Alexander Atanasov wrote:
> > > 
> > > 
> 
> [snip]
> 
> > Can you test the following patch and see if re-order still can happen?
> > 
> > 
> > diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> > index 26cf3c7ceeb5..26d5e52ece29 100755
> > --- a/tools/testing/selftests/ublk/test_generic_01.sh
> > +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> > @@ -13,7 +13,7 @@ if ! _have_program fio; then
> >   	exit "$UBLK_SKIP_CODE"
> >   fi
> > -_prep_test "null" "sequential io order"
> > +_prep_test "null" "ublk dispatch won't reorder IO"
> >   dev_id=$(_add_ublk_dev -t null)
> >   _check_add_dev $TID $?
> > @@ -39,9 +39,13 @@ fio --name=write_seq \
> >   ERR_CODE=$?
> >   kill "$btrace_pid"
> >   wait
> > -if grep -q "io_out_of_order" "$UBLK_TMP"; then
> > -	cat "$UBLK_TMP"
> > +
> > +# Check for out-of-order completions detected by bpftrace
> > +if grep -q "^out_of_order:" "$UBLK_TMP"; then
> > +	echo "I/O reordering detected:"
> > +	grep "^out_of_order:" "$UBLK_TMP"
> >   	ERR_CODE=255
> >   fi
> > +
> >   _cleanup_test "null"
> >   _show_result $TID $ERR_CODE
> > diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
> > index b2f60a92b118..60ac40e66606 100644
> > --- a/tools/testing/selftests/ublk/trace/seq_io.bt
> > +++ b/tools/testing/selftests/ublk/trace/seq_io.bt
> > @@ -2,23 +2,45 @@
> >   	$1: 	dev_t
> >   	$2: 	RWBS
> >   	$3:     strlen($2)
> > +
> > +	Track request order between block_io_start and block_rq_complete.
> > +	For each request, record its start sequence number and verify
> > +	completions happen in the same order.
> >   */
> > +
> >   BEGIN {
> > -	@last_rw[$1, str($2)] = (uint64)0;
> > +	@start_seq = (uint64)0;
> > +	@complete_seq = (uint64)0;
> > +	@out_of_order = (uint64)0;
> > +}
> > +
> > +tracepoint:block:block_io_start
> > +{
> > +	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> > +		@start_order[args.sector] = @start_seq;
> > +		@start_seq = @start_seq + 1;
> > +	}
> >   }
> > +
> >   tracepoint:block:block_rq_complete
> >   {
> > -	$dev = $1;
> >   	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> > -		$last = @last_rw[$dev, str($2)];
> > -		if ((uint64)args.sector != $last) {
> > -			printf("io_out_of_order: exp %llu actual %llu\n",
> > -				args.sector, $last);
> > +		$expected_order = @start_order[args.sector];
> > +		if ($expected_order != @complete_seq) {
> > +			printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
> > +				args.sector, $expected_order, @complete_seq);
> > +			@out_of_order = @out_of_order + 1;
> >   		}
> > -		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
> > +		delete(@start_order[args.sector]);
> > +		@complete_seq = @complete_seq + 1;
> >   	}
> >   }
> >   END {
> > -	clear(@last_rw);
> > +	printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
> > +		@start_seq, @complete_seq, @out_of_order);
> > +	clear(@start_order);
> > +	clear(@start_seq);
> > +	clear(@complete_seq);
> > +	clear(@out_of_order);
> >   }
> 
> 
> First run after reboot always fails:
> 
> ./test_generic_01.sh
> ublk selftest: generic_01 starting at 2026-01-25T19:08:47+02:00
> I/O reordering detected:
> out_of_order: sector 112040 started at seq 88 but completed at seq 0
> out_of_order: sector 112048 started at seq 89 but completed at seq 1
> out_of_order: sector 112056 started at seq 90 but completed at seq 2
> out_of_order: sector 112064 started at seq 91 but completed at seq 3
> out_of_order: sector 112072 started at seq 92 but completed at seq 4
> out_of_order: sector 112080 started at seq 93 but completed at seq 5
> out_of_order: sector 112088 started at seq 94 but completed at seq 6
> out_of_order: sector 112096 started at seq 95 but completed at seq 7
> out_of_order: sector 112104 started at seq 96 but completed at seq 8
> out_of_order: sector 112112 started at seq 97 but completed at seq 9
> out_of_order: sector 112120 started at seq 98 but completed at seq 10
> out_of_order: sector 112128 started at seq 99 but completed at seq 11

In the 1st bpf attachment, fio may be started between attaching
block_start_req and block_complete_req. Maybe the following patch can
solve it:


diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
index 26cf3c7ceeb5..15cea9e55e1c 100755
--- a/tools/testing/selftests/ublk/test_generic_01.sh
+++ b/tools/testing/selftests/ublk/test_generic_01.sh
@@ -13,7 +13,7 @@ if ! _have_program fio; then
 	exit "$UBLK_SKIP_CODE"
 fi
 
-_prep_test "null" "sequential io order"
+_prep_test "null" "ublk dispatch won't reorder IO"
 
 dev_id=$(_add_ublk_dev -t null)
 _check_add_dev $TID $?
@@ -21,15 +21,20 @@ _check_add_dev $TID $?
 dev_t=$(_get_disk_dev_t "$dev_id")
 bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
 btrace_pid=$!
-sleep 2
 
-if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
+# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY)
+for _ in $(seq 100); do
+	grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break
+	sleep 0.1
+done
+
+if ! kill -0 "$btrace_pid" 2>/dev/null; then
 	_cleanup_test "null"
 	exit "$UBLK_SKIP_CODE"
 fi
 
-# run fio over this ublk disk
-fio --name=write_seq \
+# run fio over this ublk disk (pinned to CPU 0)
+taskset -c 0 fio --name=write_seq \
     --filename=/dev/ublkb"${dev_id}" \
     --ioengine=libaio --iodepth=16 \
     --rw=write \
@@ -39,9 +44,13 @@ fio --name=write_seq \
 ERR_CODE=$?
 kill "$btrace_pid"
 wait
-if grep -q "io_out_of_order" "$UBLK_TMP"; then
-	cat "$UBLK_TMP"
+
+# Check for out-of-order completions detected by bpftrace
+if grep -q "^out_of_order:" "$UBLK_TMP"; then
+	echo "I/O reordering detected:"
+	grep "^out_of_order:" "$UBLK_TMP"
 	ERR_CODE=255
 fi
+
 _cleanup_test "null"
 _show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
index b2f60a92b118..8ebe13d24c67 100644
--- a/tools/testing/selftests/ublk/trace/seq_io.bt
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -2,23 +2,64 @@
 	$1: 	dev_t
 	$2: 	RWBS
 	$3:     strlen($2)
+
+	Track request order between block_io_start and block_rq_complete.
+	For each request, record its start sequence number and verify
+	completions happen in the same order.
+
+	Sequence starts at 1 so that 0 means "never seen" (bpftrace maps
+	return 0 for missing keys). On first valid completion, sync
+	complete_seq to handle probe attachment races.
+
+	block_rq_complete is listed first to ensure it's attached before
+	block_io_start, reducing the window for missed completions.
 */
+
 BEGIN {
-	@last_rw[$1, str($2)] = (uint64)0;
+	@start_seq = (uint64)1;
+	@complete_seq = (uint64)0;
+	@out_of_order = (uint64)0;
+	/* Initialize map type with dummy entry */
+	@start_order[0] = (uint64)0;
+	delete(@start_order[0]);
+	printf("BPFTRACE_READY\n");
 }
+
 tracepoint:block:block_rq_complete
 {
-	$dev = $1;
 	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
-		$last = @last_rw[$dev, str($2)];
-		if ((uint64)args.sector != $last) {
-			printf("io_out_of_order: exp %llu actual %llu\n",
-				args.sector, $last);
+		$expected = @start_order[args.sector];
+		if ($expected > 0) {
+			/* Sync complete_seq on first valid completion */
+			if (@complete_seq == 0) {
+				@complete_seq = $expected;
+			}
+
+			if ($expected != @complete_seq) {
+				printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
+					args.sector, $expected, @complete_seq);
+				@out_of_order = @out_of_order + 1;
+			}
+			delete(@start_order[args.sector]);
+			@complete_seq = @complete_seq + 1;
 		}
-		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
+		/* $expected == 0 means we never saw this sector start, skip it */
+	}
+}
+
+tracepoint:block:block_io_start
+{
+	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
+		@start_order[args.sector] = @start_seq;
+		@start_seq = @start_seq + 1;
 	}
 }
 
 END {
-	clear(@last_rw);
+	printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
+		@start_seq - 1, @complete_seq, @out_of_order);
+	clear(@start_order);
+	clear(@start_seq);
+	clear(@complete_seq);
+	clear(@out_of_order);
 }



Thanks,
Ming
Re: [PATCH] selftests: ublk: io-reorder triggered in test_generic_01.sh
Posted by Alexander Atanasov 1 week, 4 days ago
Hello,

> On 26 Jan 2026, at 3:27, Ming Lei <ming.lei@redhat.com> wrote:
> 
> On Sun, Jan 25, 2026 at 08:35:20PM +0200, Alexander Atanasov wrote:
>> On 25.01.26 17:28, Ming Lei wrote:
>>> On Fri, Jan 23, 2026 at 05:00:33PM +0200, Alexander Atanasov wrote:
>>>> 
>>>> 
>> 
>> [snip]
>> 
>>> Can you test the following patch and see if re-order still can happen?
>>> 
>>> 
>>> diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
>>> index 26cf3c7ceeb5..26d5e52ece29 100755
>>> --- a/tools/testing/selftests/ublk/test_generic_01.sh
>>> +++ b/tools/testing/selftests/ublk/test_generic_01.sh
>>> @@ -13,7 +13,7 @@ if ! _have_program fio; then
>>>   exit "$UBLK_SKIP_CODE"
>>>  fi
>>> -_prep_test "null" "sequential io order"
>>> +_prep_test "null" "ublk dispatch won't reorder IO"
>>>  dev_id=$(_add_ublk_dev -t null)
>>>  _check_add_dev $TID $?
>>> @@ -39,9 +39,13 @@ fio --name=write_seq \
>>>  ERR_CODE=$?
>>>  kill "$btrace_pid"
>>>  wait
>>> -if grep -q "io_out_of_order" "$UBLK_TMP"; then
>>> - cat "$UBLK_TMP"
>>> +
>>> +# Check for out-of-order completions detected by bpftrace
>>> +if grep -q "^out_of_order:" "$UBLK_TMP"; then
>>> + echo "I/O reordering detected:"
>>> + grep "^out_of_order:" "$UBLK_TMP"
>>>   ERR_CODE=255
>>>  fi
>>> +
>>>  _cleanup_test "null"
>>>  _show_result $TID $ERR_CODE
>>> diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
>>> index b2f60a92b118..60ac40e66606 100644
>>> --- a/tools/testing/selftests/ublk/trace/seq_io.bt
>>> +++ b/tools/testing/selftests/ublk/trace/seq_io.bt
>>> @@ -2,23 +2,45 @@
>>>   $1:  dev_t
>>>   $2:  RWBS
>>>   $3:     strlen($2)
>>> +
>>> + Track request order between block_io_start and block_rq_complete.
>>> + For each request, record its start sequence number and verify
>>> + completions happen in the same order.
>>>  */
>>> +
>>>  BEGIN {
>>> - @last_rw[$1, str($2)] = (uint64)0;
>>> + @start_seq = (uint64)0;
>>> + @complete_seq = (uint64)0;
>>> + @out_of_order = (uint64)0;
>>> +}
>>> +
>>> +tracepoint:block:block_io_start
>>> +{
>>> + if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
>>> + @start_order[args.sector] = @start_seq;
>>> + @start_seq = @start_seq + 1;
>>> + }
>>>  }
>>> +
>>>  tracepoint:block:block_rq_complete
>>>  {
>>> - $dev = $1;
>>>   if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
>>> - $last = @last_rw[$dev, str($2)];
>>> - if ((uint64)args.sector != $last) {
>>> - printf("io_out_of_order: exp %llu actual %llu\n",
>>> - args.sector, $last);
>>> + $expected_order = @start_order[args.sector];
>>> + if ($expected_order != @complete_seq) {
>>> + printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
>>> + args.sector, $expected_order, @complete_seq);
>>> + @out_of_order = @out_of_order + 1;
>>>   }
>>> - @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
>>> + delete(@start_order[args.sector]);
>>> + @complete_seq = @complete_seq + 1;
>>>   }
>>>  }
>>>  END {
>>> - clear(@last_rw);
>>> + printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
>>> + @start_seq, @complete_seq, @out_of_order);
>>> + clear(@start_order);
>>> + clear(@start_seq);
>>> + clear(@complete_seq);
>>> + clear(@out_of_order);
>>>  }
>> 
>> 
>> First run after reboot always fails:
>> 
>> ./test_generic_01.sh
>> ublk selftest: generic_01 starting at 2026-01-25T19:08:47+02:00
>> I/O reordering detected:
>> out_of_order: sector 112040 started at seq 88 but completed at seq 0
>> out_of_order: sector 112048 started at seq 89 but completed at seq 1
>> out_of_order: sector 112056 started at seq 90 but completed at seq 2
>> out_of_order: sector 112064 started at seq 91 but completed at seq 3
>> out_of_order: sector 112072 started at seq 92 but completed at seq 4
>> out_of_order: sector 112080 started at seq 93 but completed at seq 5
>> out_of_order: sector 112088 started at seq 94 but completed at seq 6
>> out_of_order: sector 112096 started at seq 95 but completed at seq 7
>> out_of_order: sector 112104 started at seq 96 but completed at seq 8
>> out_of_order: sector 112112 started at seq 97 but completed at seq 9
>> out_of_order: sector 112120 started at seq 98 but completed at seq 10
>> out_of_order: sector 112128 started at seq 99 but completed at seq 11
> 
> In the 1st bpf attachment, fio may be started between attaching
> block_start_req and block_complete_req. Maybe the following patch can
> solve it:
> 
> 
> diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> index 26cf3c7ceeb5..15cea9e55e1c 100755
> --- a/tools/testing/selftests/ublk/test_generic_01.sh
> +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> @@ -13,7 +13,7 @@ if ! _have_program fio; then
> exit "$UBLK_SKIP_CODE"
> fi
> 
> -_prep_test "null" "sequential io order"
> +_prep_test "null" "ublk dispatch won't reorder IO"
> 
> dev_id=$(_add_ublk_dev -t null)
> _check_add_dev $TID $?
> @@ -21,15 +21,20 @@ _check_add_dev $TID $?
> dev_t=$(_get_disk_dev_t "$dev_id")
> bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
> btrace_pid=$!
> -sleep 2
> 
> -if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
> +# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY)
> +for _ in $(seq 100); do
> + grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break
> + sleep 0.1
> +done
> +
> +if ! kill -0 "$btrace_pid" 2>/dev/null; then
> _cleanup_test "null"
> exit "$UBLK_SKIP_CODE"
> fi
> 
> -# run fio over this ublk disk
> -fio --name=write_seq \
> +# run fio over this ublk disk (pinned to CPU 0)
> +taskset -c 0 fio --name=write_seq \
>     --filename=/dev/ublkb"${dev_id}" \
>     --ioengine=libaio --iodepth=16 \
>     --rw=write \
> @@ -39,9 +44,13 @@ fio --name=write_seq \
> ERR_CODE=$?
> kill "$btrace_pid"
> wait
> -if grep -q "io_out_of_order" "$UBLK_TMP"; then
> - cat "$UBLK_TMP"
> +
> +# Check for out-of-order completions detected by bpftrace
> +if grep -q "^out_of_order:" "$UBLK_TMP"; then
> + echo "I/O reordering detected:"
> + grep "^out_of_order:" "$UBLK_TMP"
> ERR_CODE=255
> fi
> +
> _cleanup_test "null"
> _show_result $TID $ERR_CODE
> diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
> index b2f60a92b118..8ebe13d24c67 100644
> --- a/tools/testing/selftests/ublk/trace/seq_io.bt
> +++ b/tools/testing/selftests/ublk/trace/seq_io.bt
> @@ -2,23 +2,64 @@
> $1:  dev_t
> $2:  RWBS
> $3:     strlen($2)
> +
> + Track request order between block_io_start and block_rq_complete.
> + For each request, record its start sequence number and verify
> + completions happen in the same order.
> +
> + Sequence starts at 1 so that 0 means "never seen" (bpftrace maps
> + return 0 for missing keys). On first valid completion, sync
> + complete_seq to handle probe attachment races.
> +
> + block_rq_complete is listed first to ensure it's attached before
> + block_io_start, reducing the window for missed completions.
> */
> +
> BEGIN {
> - @last_rw[$1, str($2)] = (uint64)0;
> + @start_seq = (uint64)1;
> + @complete_seq = (uint64)0;
> + @out_of_order = (uint64)0;
> + /* Initialize map type with dummy entry */
> + @start_order[0] = (uint64)0;
> + delete(@start_order[0]);
> + printf("BPFTRACE_READY\n");
> }
> +
> tracepoint:block:block_rq_complete
> {
> - $dev = $1;
> if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> - $last = @last_rw[$dev, str($2)];
> - if ((uint64)args.sector != $last) {
> - printf("io_out_of_order: exp %llu actual %llu\n",
> - args.sector, $last);
> + $expected = @start_order[args.sector];
> + if ($expected > 0) {
> + /* Sync complete_seq on first valid completion */
> + if (@complete_seq == 0) {
> + @complete_seq = $expected;
> + }
> +
> + if ($expected != @complete_seq) {
> + printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
> + args.sector, $expected, @complete_seq);
> + @out_of_order = @out_of_order + 1;
> + }
> + delete(@start_order[args.sector]);
> + @complete_seq = @complete_seq + 1;
> }
> - @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
> + /* $expected == 0 means we never saw this sector start, skip it */
> + }
> +}
> +
> +tracepoint:block:block_io_start
> +{
> + if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> + @start_order[args.sector] = @start_seq;
> + @start_seq = @start_seq + 1;
> }
> }
> 
> END {
> - clear(@last_rw);
> + printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
> + @start_seq - 1, @complete_seq, @out_of_order);
> + clear(@start_order);
> + clear(@start_seq);
> + clear(@complete_seq);
> + clear(@out_of_order);
> }
> 


This solves it - no more out of order detected.

Looks like this auto mount explains why it misses a few seqs on the first run. 

[   99.548551] ublk selftest: generic_01 starting at 2026-01-26T10:24:07+02:00
[  104.173575] NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030
[  112.680519] ublk selftest: generic_01 done at 2026-01-26T10:24:20+02:00
[  116.564281] ublk selftest: generic_01 starting at 2026-01-26T10:24:24+02:00
[  130.658273] ublk selftest: generic_01 done at 2026-01-26T10:24:38+02:00


have fun,
alex
Re: [PATCH] selftests: ublk: io-reorder triggered in test_generic_01.sh
Posted by Ming Lei 1 week, 5 days ago
On Sun, Jan 25, 2026 at 11:28 PM Ming Lei <ming.lei@redhat.com> wrote:
>
...

> >
> > Fedora 43 default config with some debugging options enabled, but no changes in schedulers.
> > Test VM storage is on a networked NAS over iSCSI - both boxes VM host and NAS have two NICs,
> > I get the errors when I load the network. So I believe the requests really complete out of
> > order due to the network in my case. All tests that have the bpftrace check fail on occasion.
>
> Can you test the following patch and see if re-order still can happen?
>
>
> diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
> index 26cf3c7ceeb5..26d5e52ece29 100755
> --- a/tools/testing/selftests/ublk/test_generic_01.sh
> +++ b/tools/testing/selftests/ublk/test_generic_01.sh
> @@ -13,7 +13,7 @@ if ! _have_program fio; then
>         exit "$UBLK_SKIP_CODE"
>  fi
>
> -_prep_test "null" "sequential io order"
> +_prep_test "null" "ublk dispatch won't reorder IO"
>
>  dev_id=$(_add_ublk_dev -t null)
>  _check_add_dev $TID $?
> @@ -39,9 +39,13 @@ fio --name=write_seq \
>  ERR_CODE=$?
>  kill "$btrace_pid"
>  wait
> -if grep -q "io_out_of_order" "$UBLK_TMP"; then
> -       cat "$UBLK_TMP"
> +
> +# Check for out-of-order completions detected by bpftrace
> +if grep -q "^out_of_order:" "$UBLK_TMP"; then
> +       echo "I/O reordering detected:"
> +       grep "^out_of_order:" "$UBLK_TMP"
>         ERR_CODE=255
>  fi
> +
>  _cleanup_test "null"
>  _show_result $TID $ERR_CODE
> diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
> index b2f60a92b118..60ac40e66606 100644
> --- a/tools/testing/selftests/ublk/trace/seq_io.bt
> +++ b/tools/testing/selftests/ublk/trace/seq_io.bt
> @@ -2,23 +2,45 @@
>         $1:     dev_t
>         $2:     RWBS
>         $3:     strlen($2)
> +
> +       Track request order between block_io_start and block_rq_complete.
> +       For each request, record its start sequence number and verify
> +       completions happen in the same order.
>  */
> +
>  BEGIN {
> -       @last_rw[$1, str($2)] = (uint64)0;
> +       @start_seq = (uint64)0;
> +       @complete_seq = (uint64)0;
> +       @out_of_order = (uint64)0;
> +}
> +
> +tracepoint:block:block_io_start
> +{
> +       if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> +               @start_order[args.sector] = @start_seq;
> +               @start_seq = @start_seq + 1;
> +       }
>  }
> +
>  tracepoint:block:block_rq_complete
>  {
> -       $dev = $1;
>         if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
> -               $last = @last_rw[$dev, str($2)];
> -               if ((uint64)args.sector != $last) {
> -                       printf("io_out_of_order: exp %llu actual %llu\n",
> -                               args.sector, $last);
> +               $expected_order = @start_order[args.sector];
> +               if ($expected_order != @complete_seq) {
> +                       printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
> +                               args.sector, $expected_order, @complete_seq);
> +                       @out_of_order = @out_of_order + 1;
>                 }
> -               @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
> +               delete(@start_order[args.sector]);
> +               @complete_seq = @complete_seq + 1;
>         }
>  }
>
>  END {
> -       clear(@last_rw);
> +       printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
> +               @start_seq, @complete_seq, @out_of_order);
> +       clear(@start_order);
> +       clear(@start_seq);
> +       clear(@complete_seq);
> +       clear(@out_of_order);
>  }

Forget another change, please apply the following delta too:

@@ -28,8 +28,8 @@ if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
        exit "$UBLK_SKIP_CODE"
 fi

-# run fio over this ublk disk
-fio --name=write_seq \
+# run fio over this ublk disk (pinned to CPU 0)
+taskset -c 0 fio --name=write_seq \


Thanks,