The base node of a block-stream operation indicates the first image
from the backing chain starting from which no data is copied to the
top node.
The block-stream job allows others to use that base image, so a second
block-stream job could be writing to it at the same time. An important
restriction is that the base image must not disappear while the stream
job is ongoing. stream_start() freezes the backing chain from top to
base with that purpose but it does it too late in the code so there is
a race condition there.
This bug was fixed in the previous commit, and this patch contains an
iotest for this scenario.
Signed-off-by: Alberto Garcia <berto@igalia.com>
---
tests/qemu-iotests/030 | 32 ++++++++++++++++++++++++++++++++
tests/qemu-iotests/030.out | 4 ++--
2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 276e06b5ba..184a59d465 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -314,6 +314,38 @@ class TestParallelOps(iotests.QMPTestCase):
self.wait_until_completed(drive='commit-drive0')
+ # In this case the base node of the stream job is the same as the
+ # top node of commit job. Since block-commit removes the top node
+ # when it finishes, this is not allowed.
+ def test_overlapping_4(self):
+ self.assert_no_active_block_jobs()
+
+ # Commit from node2 into node0
+ result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[2], base=self.imgs[0])
+ self.assert_qmp(result, 'return', {})
+
+ # Stream from node2 into node4
+ result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='node4')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # Wait for all jobs to be finished.
+ pending_jobs = ['drive0']
+ while len(pending_jobs) > 0:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_COMPLETED':
+ node_name = self.dictpath(event, 'data/device')
+ self.assertTrue(node_name in pending_jobs)
+ self.assert_qmp_absent(event, 'data/error')
+ pending_jobs.remove(node_name)
+ if event['event'] == 'BLOCK_JOB_READY':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/type', 'commit')
+ self.assert_qmp_absent(event, 'data/error')
+ self.assertTrue('drive0' in pending_jobs)
+ self.vm.qmp('block-job-complete', device='drive0')
+
+ self.assert_no_active_block_jobs()
+
# Test a block-stream and a block-commit job in parallel
# Here the stream job is supposed to finish quickly in order to reproduce
# the scenario that triggers the bug fixed in 3d5d319e1221 and 1a63a907507
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 42314e9c00..4fd1c2dcd2 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-........................
+.........................
----------------------------------------------------------------------
-Ran 24 tests
+Ran 25 tests
OK
--
2.11.0
26.03.2019 20:07, Alberto Garcia wrote:
> The base node of a block-stream operation indicates the first image
> from the backing chain starting from which no data is copied to the
> top node.
>
> The block-stream job allows others to use that base image, so a second
> block-stream job could be writing to it at the same time. An important
> restriction is that the base image must not disappear while the stream
> job is ongoing. stream_start() freezes the backing chain from top to
> base with that purpose but it does it too late in the code so there is
> a race condition there.
>
> This bug was fixed in the previous commit, and this patch contains an
> iotest for this scenario.
>
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
> tests/qemu-iotests/030 | 32 ++++++++++++++++++++++++++++++++
> tests/qemu-iotests/030.out | 4 ++--
> 2 files changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
> index 276e06b5ba..184a59d465 100755
> --- a/tests/qemu-iotests/030
> +++ b/tests/qemu-iotests/030
> @@ -314,6 +314,38 @@ class TestParallelOps(iotests.QMPTestCase):
>
> self.wait_until_completed(drive='commit-drive0')
>
> + # In this case the base node of the stream job is the same as the
> + # top node of commit job. Since block-commit removes the top node
> + # when it finishes, this is not allowed.
> + def test_overlapping_4(self):
> + self.assert_no_active_block_jobs()
> +
> + # Commit from node2 into node0
> + result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[2], base=self.imgs[0])
> + self.assert_qmp(result, 'return', {})
> +
> + # Stream from node2 into node4
> + result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='node4')
> + self.assert_qmp(result, 'error/class', 'GenericError')
> +
> + # Wait for all jobs to be finished.
what about use just
self.cancel_and_wait() or self.wait_until_completed() here?
> + pending_jobs = ['drive0']
> + while len(pending_jobs) > 0:
> + for event in self.vm.get_qmp_events(wait=True):
> + if event['event'] == 'BLOCK_JOB_COMPLETED':
> + node_name = self.dictpath(event, 'data/device')
> + self.assertTrue(node_name in pending_jobs)
> + self.assert_qmp_absent(event, 'data/error')
> + pending_jobs.remove(node_name)
> + if event['event'] == 'BLOCK_JOB_READY':
> + self.assert_qmp(event, 'data/device', 'drive0')
> + self.assert_qmp(event, 'data/type', 'commit')
> + self.assert_qmp_absent(event, 'data/error')
> + self.assertTrue('drive0' in pending_jobs)
> + self.vm.qmp('block-job-complete', device='drive0')
> +
> + self.assert_no_active_block_jobs()
> +
> # Test a block-stream and a block-commit job in parallel
> # Here the stream job is supposed to finish quickly in order to reproduce
> # the scenario that triggers the bug fixed in 3d5d319e1221 and 1a63a907507
> diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
> index 42314e9c00..4fd1c2dcd2 100644
> --- a/tests/qemu-iotests/030.out
> +++ b/tests/qemu-iotests/030.out
> @@ -1,5 +1,5 @@
> -........................
> +.........................
> ----------------------------------------------------------------------
> -Ran 24 tests
> +Ran 25 tests
>
> OK
>
--
Best regards,
Vladimir
On Thu 28 Mar 2019 11:17:48 AM CET, Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> wrote: >> + # Wait for all jobs to be finished. > > what about use just > self.cancel_and_wait() or self.wait_until_completed() here? You're right, wait_until_completed() should be enough, commit on an intermediate node does not use BLOCK_JOB_READY. I'll resend. Berto
© 2016 - 2026 Red Hat, Inc.