Add a test case that reproduces
https://issues.redhat.com/browse/RHEL-88175.
When the mirror blockjob completes, it replaces the original vdpa-blk
blockdev node with a new vdpa-blk blockdev. This will only work if the
BlockRAMRegistrar populates memory mappings (see the previous commit).
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
tests/functional/x86_64/meson.build | 1 +
.../functional/x86_64/test_vdpa_blk_mirror.py | 118 ++++++++++++++++++
2 files changed, 119 insertions(+)
create mode 100755 tests/functional/x86_64/test_vdpa_blk_mirror.py
diff --git a/tests/functional/x86_64/meson.build b/tests/functional/x86_64/meson.build
index f78eec5e6c..dfe0e00190 100644
--- a/tests/functional/x86_64/meson.build
+++ b/tests/functional/x86_64/meson.build
@@ -33,6 +33,7 @@ tests_x86_64_system_thorough = [
'replay',
'reverse_debug',
'tuxrun',
+ 'vdpa_blk_mirror',
'vfio_user_client',
'virtio_balloon',
'virtio_gpu',
diff --git a/tests/functional/x86_64/test_vdpa_blk_mirror.py b/tests/functional/x86_64/test_vdpa_blk_mirror.py
new file mode 100755
index 0000000000..7d52836920
--- /dev/null
+++ b/tests/functional/x86_64/test_vdpa_blk_mirror.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright Red Hat, Inc.
+#
+# vdpa-blk mirror blockjob tests
+
+
+import glob
+import os
+import subprocess
+from qemu_test import LinuxKernelTest, Asset
+from qemu_test import exec_command_and_wait_for_pattern
+
+
+def run(cmd: str) -> None:
+ '''
+ Run a shell command without capturing stdout/stderr and raise
+ subprocess.CalledProcessError on failure.
+ '''
+ subprocess.check_call(cmd, shell=True,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL)
+
+
+class VdpaBlk(LinuxKernelTest):
+
+ KERNEL_COMMAND_LINE = 'printk.time=0 console=ttyS0 rd.rescue'
+ ASSET_KERNEL = Asset(
+ ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
+ '/31/Server/x86_64/os/images/pxeboot/vmlinuz'),
+ 'd4738d03dbbe083ca610d0821d0a8f1488bebbdccef54ce33e3adb35fda00129')
+ ASSET_INITRD = Asset(
+ ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
+ '/31/Server/x86_64/os/images/pxeboot/initrd.img'),
+ '277cd6c7adf77c7e63d73bbb2cded8ef9e2d3a2f100000e92ff1f8396513cd8b')
+ VDPA_DEV_1 = f'vdpa-{os.getpid()}-1'
+ VDPA_DEV_2 = f'vdpa-{os.getpid()}-2'
+
+ def setUp(self) -> None:
+ def create_vdpa_dev(name):
+ '''
+ Create a new vdpasim_blk device and return its vhost_vdpa device
+ path.
+ '''
+ run(f'sudo -n vdpa dev add mgmtdev vdpasim_blk name {name}')
+ sysfs_vhost_vdpa_dev_dir = \
+ glob.glob(f'/sys/bus/vdpa/devices/{name}/vhost-vdpa-*')[0]
+ vhost_dev_basename = os.path.basename(sysfs_vhost_vdpa_dev_dir)
+ vhost_dev_path = f'/dev/{vhost_dev_basename}'
+ run(f'sudo -n chown {os.getuid()}:{os.getgid()} {vhost_dev_path}')
+ return vhost_dev_path
+
+ try:
+ run('sudo -n modprobe vhost_vdpa')
+ run('sudo -n modprobe vdpa_sim_blk')
+
+ self.vhost_dev_1_path = create_vdpa_dev(self.VDPA_DEV_1)
+ self.vhost_dev_2_path = create_vdpa_dev(self.VDPA_DEV_2)
+ except subprocess.CalledProcessError:
+ self.skipTest('Failed to set up vdpa_blk device')
+
+ super().setUp()
+
+ def tearDown(self) -> None:
+ super().tearDown()
+
+ try:
+ run(f'sudo -n vdpa dev del {self.VDPA_DEV_2}')
+ run(f'sudo -n vdpa dev del {self.VDPA_DEV_1}')
+ run('sudo -n modprobe --remove vdpa_sim_blk')
+ run('sudo -n modprobe --remove vhost_vdpa')
+ except subprocess.CalledProcessError:
+ pass # ignore failures
+
+ def test_mirror(self) -> None:
+ '''
+ Check that I/O works after a mirror blockjob pivots. See
+ https://issues.redhat.com/browse/RHEL-88175.
+ '''
+ kernel_path = self.ASSET_KERNEL.fetch()
+ initrd_path = self.ASSET_INITRD.fetch()
+
+ self.vm.add_args('-m', '1G')
+ self.vm.add_args('-object', 'memory-backend-memfd,id=mem,size=1G')
+ self.vm.add_args('-machine', 'pc,accel=kvm:tcg,memory-backend=mem')
+ self.vm.add_args('-append', self.KERNEL_COMMAND_LINE)
+ self.vm.add_args('-blockdev',
+ 'virtio-blk-vhost-vdpa,node-name=vdpa-blk-0,' +
+ f'path={self.vhost_dev_1_path},cache.direct=on')
+ self.vm.add_args('-device', 'virtio-blk-pci,drive=vdpa-blk-0')
+
+ self.launch_kernel(kernel_path, initrd_path,
+ wait_for='# ')
+
+ self.vm.cmd('blockdev-add',
+ driver='virtio-blk-vhost-vdpa',
+ node_name='vdpa-blk-1',
+ path=self.vhost_dev_2_path,
+ cache={'direct': True})
+ self.vm.cmd('blockdev-mirror',
+ device='vdpa-blk-0',
+ job_id='mirror0',
+ target='vdpa-blk-1',
+ sync='full',
+ target_is_zero=True)
+ self.vm.event_wait('BLOCK_JOB_READY')
+ self.vm.cmd('block-job-complete',
+ device='mirror0')
+
+ exec_command_and_wait_for_pattern(self,
+ 'dd if=/dev/vda of=/dev/null iflag=direct bs=4k count=1',
+ '4096 bytes (4.1 kB, 4.0 KiB) copied')
+
+
+if __name__ == '__main__':
+ LinuxKernelTest.main()
--
2.51.0
On Tue, Oct 07, 2025 at 02:34:47PM -0400, Stefan Hajnoczi wrote:
> Add a test case that reproduces
> https://issues.redhat.com/browse/RHEL-88175.
>
> When the mirror blockjob completes, it replaces the original vdpa-blk
> blockdev node with a new vdpa-blk blockdev. This will only work if the
> BlockRAMRegistrar populates memory mappings (see the previous commit).
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> tests/functional/x86_64/meson.build | 1 +
> .../functional/x86_64/test_vdpa_blk_mirror.py | 118 ++++++++++++++++++
> 2 files changed, 119 insertions(+)
> create mode 100755 tests/functional/x86_64/test_vdpa_blk_mirror.py
I tried running this test to see how it fared on my system. With the
entire series applied (and after I set up passwordless sudo, to run
instead of skip), the test passed. I then reverted the rest of the
series, and the test hung, instead of timing out or printing an error
message about a specific failure. Then I tried reapplying the rest of
the series, but now the test hangs because whatever state the system
was left in after the earlier failed test is preventing the re-run
from starting; and it appears to be uninterruptible (SIGINT is not
ending the hung test, and even ctrl-z is not letting me move the test
into a background process; I had to resort to kill -9 from another
terminal).
I'm less familiar with functional tests in general, but it might be
nice to figure out a way to quickly report failure when testing
without the rest of the series, rather than leaving the system in a
wedged state.
Of course, since CI will never be running the test without the rest of
the series in place, that is not a show-stopper for accepting this
series as-is. I'm not even sure if adding a timeout to the dd command
[1] would help in tearing down the vdpa_sim_blk device on a test
failure.
And it may not even be something that QEMU can do anything about - the
whole point of the rest of the series is so that vdpa still has memory
mappings after migration so it can complete I/O; without the rest of
the series, the test is correctly proving that the migration lost the
mappings and thus can't complete I/O, even if I have no idea how to
force the kernel to relinquish the device when we know that the I/O
won't ever happen.
So, I'm fine if you add:
Tested-by: Eric Blake <eblake@redhat.com>
even though I'm not comfortable with a Reviewed-by at this time.
> diff --git a/tests/functional/x86_64/test_vdpa_blk_mirror.py b/tests/functional/x86_64/test_vdpa_blk_mirror.py
> +class VdpaBlk(LinuxKernelTest):
> +
> + KERNEL_COMMAND_LINE = 'printk.time=0 console=ttyS0 rd.rescue'
> + ASSET_KERNEL = Asset(
> + ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
> + '/31/Server/x86_64/os/images/pxeboot/vmlinuz'),
> + 'd4738d03dbbe083ca610d0821d0a8f1488bebbdccef54ce33e3adb35fda00129')
> + ASSET_INITRD = Asset(
> + ('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
> + '/31/Server/x86_64/os/images/pxeboot/initrd.img'),
> + '277cd6c7adf77c7e63d73bbb2cded8ef9e2d3a2f100000e92ff1f8396513cd8b')
> + VDPA_DEV_1 = f'vdpa-{os.getpid()}-1'
> + VDPA_DEV_2 = f'vdpa-{os.getpid()}-2'
> +
> + def setUp(self) -> None:
> + def create_vdpa_dev(name):
> + '''
> + Create a new vdpasim_blk device and return its vhost_vdpa device
> + path.
> + '''
> + run(f'sudo -n vdpa dev add mgmtdev vdpasim_blk name {name}')
> + sysfs_vhost_vdpa_dev_dir = \
> + glob.glob(f'/sys/bus/vdpa/devices/{name}/vhost-vdpa-*')[0]
> + vhost_dev_basename = os.path.basename(sysfs_vhost_vdpa_dev_dir)
> + vhost_dev_path = f'/dev/{vhost_dev_basename}'
> + run(f'sudo -n chown {os.getuid()}:{os.getgid()} {vhost_dev_path}')
> + return vhost_dev_path
> +
> + try:
> + run('sudo -n modprobe vhost_vdpa')
> + run('sudo -n modprobe vdpa_sim_blk')
Once I did kill -9 on the hung test, manually trying this line fails
with:
$ sudo -n modprobe vdpa_sim_blk
modprobe: ERROR: could not insert 'vdpa_sim_blk': Device or resource busy
as my evidence that something really did get wedged in trying to clean
up after the hang. Even
$ sudo -n vdpa dev show
is hanging with no output, no response to ctrl-c or -z, and requires
kill -9. Running it under strace ends at:
...
socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) = 3
setsockopt(3, SOL_NETLINK, NETLINK_CAP_ACK, [1], 4) = 0
setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=3694229, nl_groups=00000000}, [12]) = 0
> +
> + self.vhost_dev_1_path = create_vdpa_dev(self.VDPA_DEV_1)
> + self.vhost_dev_2_path = create_vdpa_dev(self.VDPA_DEV_2)
> + except subprocess.CalledProcessError:
> + self.skipTest('Failed to set up vdpa_blk device')
> +
> + super().setUp()
> +
> + def tearDown(self) -> None:
> + super().tearDown()
> +
> + try:
> + run(f'sudo -n vdpa dev del {self.VDPA_DEV_2}')
> + run(f'sudo -n vdpa dev del {self.VDPA_DEV_1}')
> + run('sudo -n modprobe --remove vdpa_sim_blk')
> + run('sudo -n modprobe --remove vhost_vdpa')
so these cleanups are not happening because of whatever else already
wedged.
> + except subprocess.CalledProcessError:
> + pass # ignore failures
> +
> + def test_mirror(self) -> None:
> + '''
> + Check that I/O works after a mirror blockjob pivots. See
> + https://issues.redhat.com/browse/RHEL-88175.
> + '''
> + kernel_path = self.ASSET_KERNEL.fetch()
> + initrd_path = self.ASSET_INITRD.fetch()
> +
> + self.vm.add_args('-m', '1G')
> + self.vm.add_args('-object', 'memory-backend-memfd,id=mem,size=1G')
> + self.vm.add_args('-machine', 'pc,accel=kvm:tcg,memory-backend=mem')
> + self.vm.add_args('-append', self.KERNEL_COMMAND_LINE)
> + self.vm.add_args('-blockdev',
> + 'virtio-blk-vhost-vdpa,node-name=vdpa-blk-0,' +
> + f'path={self.vhost_dev_1_path},cache.direct=on')
> + self.vm.add_args('-device', 'virtio-blk-pci,drive=vdpa-blk-0')
> +
> + self.launch_kernel(kernel_path, initrd_path,
> + wait_for='# ')
> +
> + self.vm.cmd('blockdev-add',
> + driver='virtio-blk-vhost-vdpa',
> + node_name='vdpa-blk-1',
> + path=self.vhost_dev_2_path,
> + cache={'direct': True})
> + self.vm.cmd('blockdev-mirror',
> + device='vdpa-blk-0',
> + job_id='mirror0',
> + target='vdpa-blk-1',
> + sync='full',
> + target_is_zero=True)
> + self.vm.event_wait('BLOCK_JOB_READY')
> + self.vm.cmd('block-job-complete',
> + device='mirror0')
> +
> + exec_command_and_wait_for_pattern(self,
> + 'dd if=/dev/vda of=/dev/null iflag=direct bs=4k count=1',
[1] This might be the spot where adding a timeout command would help
the guest relinquish control of the block device, but that is still
not obvious to me whether it would also be enough for the test to fail
cleanly and allow a clean restart.
> + '4096 bytes (4.1 kB, 4.0 KiB) copied')
> +
> +
> +if __name__ == '__main__':
> + LinuxKernelTest.main()
> --
> 2.51.0
>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
© 2016 - 2026 Red Hat, Inc.