include/hw/virtio/virtio-gpu.h | 4 +-- hw/display/virtio-gpu.c | 60 ++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 34 deletions(-)
This fixes a deadlock I previously observed with the test in [1].
However, I can no longer reproduce the issue reliably with that test, so
I used Codex, a coding agent, to write a more reliable local test case,
shown below. I applied to Codex for Open Source to get access. The test
case is not intended for merge: current policy prohibits that, and it is
probably not worth carrying anyway because race-condition tests are
inherently fragile. The remaining patches were written by me.
[1] https://lore.kernel.org/qemu-devel/20251014111234.3190346-9-alex.bennee@linaro.org/
To: qemu-devel@nongnu.org
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Below is the Codex-written test case:
diff --git a/tests/functional/aarch64/test_gpu_blob.py b/tests/functional/aarch64/test_gpu_blob.py
index a913d3b29c84..52627b4541f9 100755
--- a/tests/functional/aarch64/test_gpu_blob.py
+++ b/tests/functional/aarch64/test_gpu_blob.py
@@ -13,7 +13,9 @@
#
# SPDX-License-Identifier: GPL-2.0-or-later
-from qemu.machine.machine import VMLaunchFailure
+import subprocess
+
+from qemu.machine.machine import AbnormalShutdown, VMLaunchFailure
from qemu_test import Asset
from qemu_test import wait_for_console_pattern
@@ -25,8 +27,7 @@ class Aarch64VirtBlobTest(LinuxKernelTest):
'download?path=%2Fblob-test&files=qemu-880.bin',
'2f6ab85d0b156c94fcedd2c4c821c5cbd52925a2de107f8e2d569ea2e34e42eb')
- def test_virtio_gpu_blob(self):
-
+ def launch_blob_test(self):
self.set_machine('virt')
self.require_accelerator("tcg")
@@ -65,9 +66,27 @@ def test_virtio_gpu_blob(self):
self.log.info("unhandled launch failure: %s", excp.output)
raise excp
+ def test_virtio_gpu_blob(self):
+ self.launch_blob_test()
+
self.wait_for_console_pattern('[INFO] virtio-gpu test finished')
# the test should cleanly exit
+ def test_virtio_gpu_blob_shutdown_race(self):
+ self.launch_blob_test()
+
+ self.wait_for_console_pattern('[INFO] unmapping blob object resource')
+
+ try:
+ self.vm.shutdown(timeout=10)
+ except AbnormalShutdown as excp:
+ if isinstance(excp.__cause__, subprocess.TimeoutExpired):
+ raise AssertionError(
+ "QEMU failed to exit while virtio-gpu reset was racing "
+ "with shutdown") from excp
+ self.log.info("QEMU exited before the shutdown request completed: %s",
+ excp)
+
if __name__ == '__main__':
LinuxKernelTest.main()
---
Changes in v2:
- Added the patch "virtio-gpu: Run reset cleanup in the same BH".
- My assumption about the ordering was incorrect, so I changed the patch
to follow the approach used by virtio-gpu-gl.
- Link to v1: https://lore.kernel.org/qemu-devel/20251029-gpu-v1-1-e3e3c7eebc9e@rsg.ci.i.u-tokyo.ac.jp
---
Akihiko Odaki (2):
virtio-gpu: Run reset cleanup in the same BH
virtio-gpu: Do not wait for the main thread during reset
include/hw/virtio/virtio-gpu.h | 4 +--
hw/display/virtio-gpu.c | 60 ++++++++++++++++++++----------------------
2 files changed, 30 insertions(+), 34 deletions(-)
---
base-commit: 14f38a63b9adc02c0ebe3b5ada1f1208abaf21ea
change-id: 20251029-gpu-c3f45747f7ba
Best regards,
--
Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
> This fixes a deadlock I previously observed with the test in [1].
>
> However, I can no longer reproduce the issue reliably with that test, so
> I used Codex, a coding agent, to write a more reliable local test case,
> shown below. I applied to Codex for Open Source to get access. The test
> case is not intended for merge: current policy prohibits that, and it is
> probably not worth carrying anyway because race-condition tests are
> inherently fragile.
What sort of hit rate where you getting with the race? So far they have
both been rock solid without the additional patches for me.
> The remaining patches were written by me.
>
> [1] https://lore.kernel.org/qemu-devel/20251014111234.3190346-9-alex.bennee@linaro.org/
>
> To: qemu-devel@nongnu.org
> Cc: Alex Bennée <alex.bennee@linaro.org>
> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
>
> Below is the Codex-written test case:
>
> diff --git a/tests/functional/aarch64/test_gpu_blob.py b/tests/functional/aarch64/test_gpu_blob.py
> index a913d3b29c84..52627b4541f9 100755
> --- a/tests/functional/aarch64/test_gpu_blob.py
> +++ b/tests/functional/aarch64/test_gpu_blob.py
> @@ -13,7 +13,9 @@
> #
> # SPDX-License-Identifier: GPL-2.0-or-later
>
> -from qemu.machine.machine import VMLaunchFailure
> +import subprocess
> +
> +from qemu.machine.machine import AbnormalShutdown, VMLaunchFailure
>
> from qemu_test import Asset
> from qemu_test import wait_for_console_pattern
> @@ -25,8 +27,7 @@ class Aarch64VirtBlobTest(LinuxKernelTest):
> 'download?path=%2Fblob-test&files=qemu-880.bin',
> '2f6ab85d0b156c94fcedd2c4c821c5cbd52925a2de107f8e2d569ea2e34e42eb')
>
> - def test_virtio_gpu_blob(self):
> -
> + def launch_blob_test(self):
> self.set_machine('virt')
> self.require_accelerator("tcg")
>
> @@ -65,9 +66,27 @@ def test_virtio_gpu_blob(self):
> self.log.info("unhandled launch failure: %s", excp.output)
> raise excp
>
> + def test_virtio_gpu_blob(self):
> + self.launch_blob_test()
> +
> self.wait_for_console_pattern('[INFO] virtio-gpu test finished')
> # the test should cleanly exit
>
> + def test_virtio_gpu_blob_shutdown_race(self):
> + self.launch_blob_test()
> +
> + self.wait_for_console_pattern('[INFO] unmapping blob object resource')
> +
> + try:
> + self.vm.shutdown(timeout=10)
> + except AbnormalShutdown as excp:
> + if isinstance(excp.__cause__, subprocess.TimeoutExpired):
> + raise AssertionError(
> + "QEMU failed to exit while virtio-gpu reset was racing "
> + "with shutdown") from excp
> + self.log.info("QEMU exited before the shutdown request completed: %s",
> + excp)
> +
>
> if __name__ == '__main__':
> LinuxKernelTest.main()
>
> ---
> Changes in v2:
> - Added the patch "virtio-gpu: Run reset cleanup in the same BH".
> - My assumption about the ordering was incorrect, so I changed the patch
> to follow the approach used by virtio-gpu-gl.
> - Link to v1: https://lore.kernel.org/qemu-devel/20251029-gpu-v1-1-e3e3c7eebc9e@rsg.ci.i.u-tokyo.ac.jp
>
> ---
> Akihiko Odaki (2):
> virtio-gpu: Run reset cleanup in the same BH
> virtio-gpu: Do not wait for the main thread during reset
>
> include/hw/virtio/virtio-gpu.h | 4 +--
> hw/display/virtio-gpu.c | 60 ++++++++++++++++++++----------------------
> 2 files changed, 30 insertions(+), 34 deletions(-)
> ---
> base-commit: 14f38a63b9adc02c0ebe3b5ada1f1208abaf21ea
> change-id: 20251029-gpu-c3f45747f7ba
>
> Best regards,
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
On 2026/05/19 4:35, Alex Bennée wrote:
> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>
>> This fixes a deadlock I previously observed with the test in [1].
>>
>> However, I can no longer reproduce the issue reliably with that test, so
>> I used Codex, a coding agent, to write a more reliable local test case,
>> shown below. I applied to Codex for Open Source to get access. The test
>> case is not intended for merge: current policy prohibits that, and it is
>> probably not worth carrying anyway because race-condition tests are
>> inherently fragile.
>
> What sort of hit rate where you getting with the race? So far they have
> both been rock solid without the additional patches for me.
I hit the deadlock in 8 out of 10 trials.
>
>
>> The remaining patches were written by me.
>>
>> [1] https://lore.kernel.org/qemu-devel/20251014111234.3190346-9-alex.bennee@linaro.org/
>>
>> To: qemu-devel@nongnu.org
>> Cc: Alex Bennée <alex.bennee@linaro.org>
>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
>>
>> Below is the Codex-written test case:
>>
>> diff --git a/tests/functional/aarch64/test_gpu_blob.py b/tests/functional/aarch64/test_gpu_blob.py
>> index a913d3b29c84..52627b4541f9 100755
>> --- a/tests/functional/aarch64/test_gpu_blob.py
>> +++ b/tests/functional/aarch64/test_gpu_blob.py
>> @@ -13,7 +13,9 @@
>> #
>> # SPDX-License-Identifier: GPL-2.0-or-later
>>
>> -from qemu.machine.machine import VMLaunchFailure
>> +import subprocess
>> +
>> +from qemu.machine.machine import AbnormalShutdown, VMLaunchFailure
>>
>> from qemu_test import Asset
>> from qemu_test import wait_for_console_pattern
>> @@ -25,8 +27,7 @@ class Aarch64VirtBlobTest(LinuxKernelTest):
>> 'download?path=%2Fblob-test&files=qemu-880.bin',
>> '2f6ab85d0b156c94fcedd2c4c821c5cbd52925a2de107f8e2d569ea2e34e42eb')
>>
>> - def test_virtio_gpu_blob(self):
>> -
>> + def launch_blob_test(self):
>> self.set_machine('virt')
>> self.require_accelerator("tcg")
>>
>> @@ -65,9 +66,27 @@ def test_virtio_gpu_blob(self):
>> self.log.info("unhandled launch failure: %s", excp.output)
>> raise excp
>>
>> + def test_virtio_gpu_blob(self):
>> + self.launch_blob_test()
>> +
>> self.wait_for_console_pattern('[INFO] virtio-gpu test finished')
>> # the test should cleanly exit
>>
>> + def test_virtio_gpu_blob_shutdown_race(self):
>> + self.launch_blob_test()
>> +
>> + self.wait_for_console_pattern('[INFO] unmapping blob object resource')
>> +
>> + try:
>> + self.vm.shutdown(timeout=10)
>> + except AbnormalShutdown as excp:
>> + if isinstance(excp.__cause__, subprocess.TimeoutExpired):
>> + raise AssertionError(
>> + "QEMU failed to exit while virtio-gpu reset was racing "
>> + "with shutdown") from excp
>> + self.log.info("QEMU exited before the shutdown request completed: %s",
>> + excp)
>> +
>>
>> if __name__ == '__main__':
>> LinuxKernelTest.main()
>>
>> ---
>> Changes in v2:
>> - Added the patch "virtio-gpu: Run reset cleanup in the same BH".
>> - My assumption about the ordering was incorrect, so I changed the patch
>> to follow the approach used by virtio-gpu-gl.
>> - Link to v1: https://lore.kernel.org/qemu-devel/20251029-gpu-v1-1-e3e3c7eebc9e@rsg.ci.i.u-tokyo.ac.jp
>>
>> ---
>> Akihiko Odaki (2):
>> virtio-gpu: Run reset cleanup in the same BH
>> virtio-gpu: Do not wait for the main thread during reset
>>
>> include/hw/virtio/virtio-gpu.h | 4 +--
>> hw/display/virtio-gpu.c | 60 ++++++++++++++++++++----------------------
>> 2 files changed, 30 insertions(+), 34 deletions(-)
>> ---
>> base-commit: 14f38a63b9adc02c0ebe3b5ada1f1208abaf21ea
>> change-id: 20251029-gpu-c3f45747f7ba
>>
>> Best regards,
>
Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
> On 2026/05/19 4:35, Alex Bennée wrote:
>> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>>
>>> This fixes a deadlock I previously observed with the test in [1].
>>>
>>> However, I can no longer reproduce the issue reliably with that test, so
>>> I used Codex, a coding agent, to write a more reliable local test case,
>>> shown below. I applied to Codex for Open Source to get access. The test
>>> case is not intended for merge: current policy prohibits that, and it is
>>> probably not worth carrying anyway because race-condition tests are
>>> inherently fragile.
>> What sort of hit rate where you getting with the race? So far they
>> have
>> both been rock solid without the additional patches for me.
>
> I hit the deadlock in 8 out of 10 trials.
It's taking a lot longer on my system (~ 1 in 100) but with these
patches I'm still seeing a hang, it just takes a lot longer to get
there.
>
>>
>>> The remaining patches were written by me.
>>>
>>> [1] https://lore.kernel.org/qemu-devel/20251014111234.3190346-9-alex.bennee@linaro.org/
>>>
>>> To: qemu-devel@nongnu.org
>>> Cc: Alex Bennée <alex.bennee@linaro.org>
>>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>> Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
>>>
>>> Below is the Codex-written test case:
>>>
>>> diff --git a/tests/functional/aarch64/test_gpu_blob.py b/tests/functional/aarch64/test_gpu_blob.py
>>> index a913d3b29c84..52627b4541f9 100755
>>> --- a/tests/functional/aarch64/test_gpu_blob.py
>>> +++ b/tests/functional/aarch64/test_gpu_blob.py
>>> @@ -13,7 +13,9 @@
>>> #
>>> # SPDX-License-Identifier: GPL-2.0-or-later
>>> -from qemu.machine.machine import VMLaunchFailure
>>> +import subprocess
>>> +
>>> +from qemu.machine.machine import AbnormalShutdown, VMLaunchFailure
>>> from qemu_test import Asset
>>> from qemu_test import wait_for_console_pattern
>>> @@ -25,8 +27,7 @@ class Aarch64VirtBlobTest(LinuxKernelTest):
>>> 'download?path=%2Fblob-test&files=qemu-880.bin',
>>> '2f6ab85d0b156c94fcedd2c4c821c5cbd52925a2de107f8e2d569ea2e34e42eb')
>>> - def test_virtio_gpu_blob(self):
>>> -
>>> + def launch_blob_test(self):
>>> self.set_machine('virt')
>>> self.require_accelerator("tcg")
>>> @@ -65,9 +66,27 @@ def test_virtio_gpu_blob(self):
>>> self.log.info("unhandled launch failure: %s", excp.output)
>>> raise excp
>>> + def test_virtio_gpu_blob(self):
>>> + self.launch_blob_test()
>>> +
>>> self.wait_for_console_pattern('[INFO] virtio-gpu test finished')
>>> # the test should cleanly exit
>>> + def test_virtio_gpu_blob_shutdown_race(self):
>>> + self.launch_blob_test()
>>> +
>>> + self.wait_for_console_pattern('[INFO] unmapping blob object resource')
>>> +
>>> + try:
>>> + self.vm.shutdown(timeout=10)
>>> + except AbnormalShutdown as excp:
>>> + if isinstance(excp.__cause__, subprocess.TimeoutExpired):
>>> + raise AssertionError(
>>> + "QEMU failed to exit while virtio-gpu reset was racing "
>>> + "with shutdown") from excp
>>> + self.log.info("QEMU exited before the shutdown request completed: %s",
>>> + excp)
>>> +
>>> if __name__ == '__main__':
>>> LinuxKernelTest.main()
>>>
>>> ---
>>> Changes in v2:
>>> - Added the patch "virtio-gpu: Run reset cleanup in the same BH".
>>> - My assumption about the ordering was incorrect, so I changed the patch
>>> to follow the approach used by virtio-gpu-gl.
>>> - Link to v1: https://lore.kernel.org/qemu-devel/20251029-gpu-v1-1-e3e3c7eebc9e@rsg.ci.i.u-tokyo.ac.jp
>>>
>>> ---
>>> Akihiko Odaki (2):
>>> virtio-gpu: Run reset cleanup in the same BH
>>> virtio-gpu: Do not wait for the main thread during reset
>>>
>>> include/hw/virtio/virtio-gpu.h | 4 +--
>>> hw/display/virtio-gpu.c | 60 ++++++++++++++++++++----------------------
>>> 2 files changed, 30 insertions(+), 34 deletions(-)
>>> ---
>>> base-commit: 14f38a63b9adc02c0ebe3b5ada1f1208abaf21ea
>>> change-id: 20251029-gpu-c3f45747f7ba
>>>
>>> Best regards,
>>
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
Alex Bennée <alex.bennee@linaro.org> writes:
> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>
>> On 2026/05/19 4:35, Alex Bennée wrote:
>>> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>>>
>>>> This fixes a deadlock I previously observed with the test in [1].
>>>>
>>>> However, I can no longer reproduce the issue reliably with that test, so
>>>> I used Codex, a coding agent, to write a more reliable local test case,
>>>> shown below. I applied to Codex for Open Source to get access. The test
>>>> case is not intended for merge: current policy prohibits that, and it is
>>>> probably not worth carrying anyway because race-condition tests are
>>>> inherently fragile.
>>> What sort of hit rate where you getting with the race? So far they
>>> have
>>> both been rock solid without the additional patches for me.
>>
>> I hit the deadlock in 8 out of 10 trials.
>
> It's taking a lot longer on my system (~ 1 in 100) but with these
> patches I'm still seeing a hang, it just takes a lot longer to get
> there.
tsan shows:
[INFO] mapping blob object resource
[INFO] resource_map_blob response is CtrlHeader { hdr_type: Command(4358), flags: 0, fence_id: 0, ctx_id: 0, _padding: 0 }
[INFO] unmapping blob object resource
==================
WARNING: ThreadSanitizer: data race (pid=3564641)
Write of size 8 at 0x55c8ce6d4250 by thread T1 (mutexes: write M0, write M1):
#0 qemu_ram_free <null> (qemu-system-aarch64+0x98f863) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#1 memory_region_destructor_ram <null> (qemu-system-aarch64+0x977046) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#2 memory_region_finalize <null> (qemu-system-aarch64+0x9830e5) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#3 object_unref <null> (qemu-system-aarch64+0xfa741c) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#4 object_finalize_child_property <null> (qemu-system-aarch64+0xfa765f) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#5 object_unref <null> (qemu-system-aarch64+0xfa73d6) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#6 flatview_destroy <null> (qemu-system-aarch64+0x978e7d) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#7 call_rcu_thread <null> (qemu-system-aarch64+0x122e268) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#8 qemu_thread_start <null> (qemu-system-aarch64+0x121cc8d) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
Previous atomic read of size 8 at 0x55c8ce6d4250 by thread T7:
#0 qemu_ram_block_from_host <null> (qemu-system-aarch64+0x98fabb) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#1 qemu_ram_addr_from_host_nofail <null> (qemu-system-aarch64+0x98ff16) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#2 get_page_addr_code_hostp <null> (qemu-system-aarch64+0x4bbd0b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#3 tb_htable_lookup <null> (qemu-system-aarch64+0x49f7bc) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#4 cpu_exec_loop <null> (qemu-system-aarch64+0x4a08a5) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#5 cpu_exec_setjmp <null> (qemu-system-aarch64+0x4a112b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#6 cpu_exec <null> (qemu-system-aarch64+0x4a1b74) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#7 tcg_cpu_exec <null> (qemu-system-aarch64+0x4cb92b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#8 mttcg_cpu_thread_fn <null> (qemu-system-aarch64+0x4cbe81) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#9 do_st2_mmu <null> (qemu-system-aarch64+0x4ba389) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#10 helper_stw_mmu <null> (qemu-system-aarch64+0x4bc571) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#11 <null> <null> (0x7f936faabdb2)
#12 cpu_exec_loop <null> (qemu-system-aarch64+0x4a04fc) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#13 cpu_exec_setjmp <null> (qemu-system-aarch64+0x4a112b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#14 cpu_loop_exit_noexc <null> (qemu-system-aarch64+0x4a2242) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#15 cpu_io_recompile <null> (qemu-system-aarch64+0x4b0a9b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#16 do_ld_mmio_beN <null> (qemu-system-aarch64+0x4b47c9) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#17 do_ld2_mmu <null> (qemu-system-aarch64+0x4b93aa) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#18 helper_lduw_mmu <null> (qemu-system-aarch64+0x4bc0a7) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
#19 <null> <null> (0x7f936faab758)
<snip>
So I guess we are trying to free the memory while still running?
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
On 2026/05/19 21:45, Alex Bennée wrote:
> Alex Bennée <alex.bennee@linaro.org> writes:
>
>> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>>
>>> On 2026/05/19 4:35, Alex Bennée wrote:
>>>> Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> writes:
>>>>
>>>>> This fixes a deadlock I previously observed with the test in [1].
>>>>>
>>>>> However, I can no longer reproduce the issue reliably with that test, so
>>>>> I used Codex, a coding agent, to write a more reliable local test case,
>>>>> shown below. I applied to Codex for Open Source to get access. The test
>>>>> case is not intended for merge: current policy prohibits that, and it is
>>>>> probably not worth carrying anyway because race-condition tests are
>>>>> inherently fragile.
>>>> What sort of hit rate where you getting with the race? So far they
>>>> have
>>>> both been rock solid without the additional patches for me.
>>>
>>> I hit the deadlock in 8 out of 10 trials.
>>
>> It's taking a lot longer on my system (~ 1 in 100) but with these
>> patches I'm still seeing a hang, it just takes a lot longer to get
>> there.
>
> tsan shows:
>
> [INFO] mapping blob object resource
> [INFO] resource_map_blob response is CtrlHeader { hdr_type: Command(4358), flags: 0, fence_id: 0, ctx_id: 0, _padding: 0 }
> [INFO] unmapping blob object resource
> ==================
> WARNING: ThreadSanitizer: data race (pid=3564641)
> Write of size 8 at 0x55c8ce6d4250 by thread T1 (mutexes: write M0, write M1):
> #0 qemu_ram_free <null> (qemu-system-aarch64+0x98f863) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #1 memory_region_destructor_ram <null> (qemu-system-aarch64+0x977046) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #2 memory_region_finalize <null> (qemu-system-aarch64+0x9830e5) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #3 object_unref <null> (qemu-system-aarch64+0xfa741c) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #4 object_finalize_child_property <null> (qemu-system-aarch64+0xfa765f) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #5 object_unref <null> (qemu-system-aarch64+0xfa73d6) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #6 flatview_destroy <null> (qemu-system-aarch64+0x978e7d) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #7 call_rcu_thread <null> (qemu-system-aarch64+0x122e268) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #8 qemu_thread_start <null> (qemu-system-aarch64+0x121cc8d) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
>
> Previous atomic read of size 8 at 0x55c8ce6d4250 by thread T7:
> #0 qemu_ram_block_from_host <null> (qemu-system-aarch64+0x98fabb) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #1 qemu_ram_addr_from_host_nofail <null> (qemu-system-aarch64+0x98ff16) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #2 get_page_addr_code_hostp <null> (qemu-system-aarch64+0x4bbd0b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #3 tb_htable_lookup <null> (qemu-system-aarch64+0x49f7bc) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #4 cpu_exec_loop <null> (qemu-system-aarch64+0x4a08a5) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #5 cpu_exec_setjmp <null> (qemu-system-aarch64+0x4a112b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #6 cpu_exec <null> (qemu-system-aarch64+0x4a1b74) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #7 tcg_cpu_exec <null> (qemu-system-aarch64+0x4cb92b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #8 mttcg_cpu_thread_fn <null> (qemu-system-aarch64+0x4cbe81) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #9 do_st2_mmu <null> (qemu-system-aarch64+0x4ba389) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #10 helper_stw_mmu <null> (qemu-system-aarch64+0x4bc571) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #11 <null> <null> (0x7f936faabdb2)
> #12 cpu_exec_loop <null> (qemu-system-aarch64+0x4a04fc) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #13 cpu_exec_setjmp <null> (qemu-system-aarch64+0x4a112b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #14 cpu_loop_exit_noexc <null> (qemu-system-aarch64+0x4a2242) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #15 cpu_io_recompile <null> (qemu-system-aarch64+0x4b0a9b) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #16 do_ld_mmio_beN <null> (qemu-system-aarch64+0x4b47c9) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #17 do_ld2_mmu <null> (qemu-system-aarch64+0x4b93aa) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #18 helper_lduw_mmu <null> (qemu-system-aarch64+0x4bc0a7) (BuildId: 9e57c19eb7cc79d8195b5fb05324859b4db6fbbc)
> #19 <null> <null> (0x7f936faab758)
>
> <snip>
>
> So I guess we are trying to free the memory while still running?
Probably not. qemu_ram_free() is named "free", but the RAMBlock itself
is reclaimed only after an RCU grace period. So the vCPU may still
observe the old RAMBlock while walking the RAMBlock list/MRU cache, and
that is an expected part of the lifetime scheme.
I think TSan is more likely complaining about ram_list.mru_block. The
read side uses an atomic RCU load:
block = qatomic_rcu_read(&ram_list.mru_block);
but qemu_ram_free() clears it with a plain store:
ram_list.mru_block = NULL;
If we want to fix this TSan report, the stores to ram_list.mru_block
should be made atomic as well. In qemu_ram_free(), qatomic_set_mb()
would also provide the barrier needed before updating ram_list.version,
so the explicit smp_wmb() there could go away.
This looks distinct from the remaining hang/deadlock, though. For that,
could you collect the thread backtraces when QEMU is stuck? That should
show which threads are actually waiting on each other, instead of an
incidental TSan report from the RAMBlock MRU cache.
Commit a41e2d97f92b ("virtio-gpu: reset gfx resources in main thread")
made resource destruction run in the main thread, but left command and
fence queue cleanup in virtio_gpu_reset(). When reset is initiated from
a vCPU thread, virtio_gpu_reset() schedules reset_bh and then waits with
qemu_cond_wait_bql(), which releases the BQL while the BH is running.
That split leaves a window after reset_bh has destroyed resources and
before virtio_gpu_reset() drains cmdq/fenceq. Other virtio-gpu BHs can
run in that window, so commands may be observed on the wrong side of the
reset boundary:
1. vCPU thread A: Enter virtio_gpu_reset()
2. vCPU thread A: Schedule reset_bh
3. vCPU thread A: Wait in qemu_cond_wait_bql(&g->reset_cond)
4. vCPU thread A: Drop the BQL while waiting
5. vCPU thread B: Take the BQL
6. vCPU thread B: Queue a command
7. vCPU thread B: Drop the BQL
8. Main thread: Take the BQL
9. Main thread: Run virtio_gpu_reset_bh()
10. Main thread: Destroy resources
11. Main thread: Signal g->reset_cond
12. Main thread: Process the queued command
13. Main thread: Drop the BQL
14. vCPU thread B: Take the BQL
15. vCPU thread B: Queue another command
16. vCPU thread B: Drop the BQL
17. vCPU thread A: Take the BQL
18. vCPU thread A: Leave qemu_cond_wait_bql(&g->reset_cond)
19. vCPU thread A: Delete the second command from cmdq
The first command is processed as if it happened after reset, while the
second command is discarded as if it happened before reset.
Move cmdq/fenceq cleanup and virtio_gpu_base_reset() into reset_bh so
all virtio-gpu reset state is updated in the same main-thread callback.
This keeps command processing from interleaving with a partially
completed reset.
Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
---
hw/display/virtio-gpu.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index b998ce8324d6..d514b548efd9 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1582,6 +1582,7 @@ static void virtio_gpu_reset_bh(void *opaque)
{
VirtIOGPU *g = VIRTIO_GPU(opaque);
VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
+ struct virtio_gpu_ctrl_command *cmd;
struct virtio_gpu_simple_resource *res, *tmp;
uint32_t resource_id;
Error *local_err = NULL;
@@ -1601,10 +1602,25 @@ static void virtio_gpu_reset_bh(void *opaque)
}
}
+ while (!QTAILQ_EMPTY(&g->cmdq)) {
+ cmd = QTAILQ_FIRST(&g->cmdq);
+ QTAILQ_REMOVE(&g->cmdq, cmd, next);
+ g_free(cmd);
+ }
+
+ while (!QTAILQ_EMPTY(&g->fenceq)) {
+ cmd = QTAILQ_FIRST(&g->fenceq);
+ QTAILQ_REMOVE(&g->fenceq, cmd, next);
+ g->inflight--;
+ g_free(cmd);
+ }
+
for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
}
+ virtio_gpu_base_reset(VIRTIO_GPU_BASE(g));
+
g->reset_finished = true;
qemu_cond_signal(&g->reset_cond);
}
@@ -1612,7 +1628,6 @@ static void virtio_gpu_reset_bh(void *opaque)
void virtio_gpu_reset(VirtIODevice *vdev)
{
VirtIOGPU *g = VIRTIO_GPU(vdev);
- struct virtio_gpu_ctrl_command *cmd;
if (qemu_in_vcpu_thread()) {
g->reset_finished = false;
@@ -1623,21 +1638,6 @@ void virtio_gpu_reset(VirtIODevice *vdev)
} else {
aio_bh_call(g->reset_bh);
}
-
- while (!QTAILQ_EMPTY(&g->cmdq)) {
- cmd = QTAILQ_FIRST(&g->cmdq);
- QTAILQ_REMOVE(&g->cmdq, cmd, next);
- g_free(cmd);
- }
-
- while (!QTAILQ_EMPTY(&g->fenceq)) {
- cmd = QTAILQ_FIRST(&g->fenceq);
- QTAILQ_REMOVE(&g->fenceq, cmd, next);
- g->inflight--;
- g_free(cmd);
- }
-
- virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
}
static void
--
2.53.0
On 24/4/26 16:06, Akihiko Odaki wrote:
> Commit a41e2d97f92b ("virtio-gpu: reset gfx resources in main thread")
> made resource destruction run in the main thread, but left command and
> fence queue cleanup in virtio_gpu_reset(). When reset is initiated from
> a vCPU thread, virtio_gpu_reset() schedules reset_bh and then waits with
> qemu_cond_wait_bql(), which releases the BQL while the BH is running.
>
> That split leaves a window after reset_bh has destroyed resources and
> before virtio_gpu_reset() drains cmdq/fenceq. Other virtio-gpu BHs can
> run in that window, so commands may be observed on the wrong side of the
> reset boundary:
>
> 1. vCPU thread A: Enter virtio_gpu_reset()
> 2. vCPU thread A: Schedule reset_bh
> 3. vCPU thread A: Wait in qemu_cond_wait_bql(&g->reset_cond)
> 4. vCPU thread A: Drop the BQL while waiting
> 5. vCPU thread B: Take the BQL
> 6. vCPU thread B: Queue a command
> 7. vCPU thread B: Drop the BQL
> 8. Main thread: Take the BQL
> 9. Main thread: Run virtio_gpu_reset_bh()
> 10. Main thread: Destroy resources
> 11. Main thread: Signal g->reset_cond
> 12. Main thread: Process the queued command
> 13. Main thread: Drop the BQL
> 14. vCPU thread B: Take the BQL
> 15. vCPU thread B: Queue another command
> 16. vCPU thread B: Drop the BQL
> 17. vCPU thread A: Take the BQL
> 18. vCPU thread A: Leave qemu_cond_wait_bql(&g->reset_cond)
> 19. vCPU thread A: Delete the second command from cmdq
>
> The first command is processed as if it happened after reset, while the
> second command is discarded as if it happened before reset.
>
> Move cmdq/fenceq cleanup and virtio_gpu_base_reset() into reset_bh so
> all virtio-gpu reset state is updated in the same main-thread callback.
> This keeps command processing from interleaving with a partially
> completed reset.
>
> Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> ---
> hw/display/virtio-gpu.c | 32 ++++++++++++++++----------------
> 1 file changed, 16 insertions(+), 16 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
virtio-gpu waits for the main thread to destroy resources and replace
surfaces, but it occasionally results in deadlock, so remove the code
to wait.
In particular, when running a test case[1] the main thread may wait for
the vCPUs to pause during shut down while a vCPU may be concurrently
resetting virtio-gpu.
vCPU actually does not need to perform resource destruction and surface
replacement synchronously, but it only needs to ensure correct ordering
among virtio-gpu operations. virtio-gpu-gl already exploits this fact to
ensure that virglrenderer is reset on the main thread; instead of
synchronously resetting virglrenderer when the device is being reset,
it resets virglrenderer just before processing the first command after
the device reset arrives.
Take advantage of this fact by removing synchronization between the main
thread and the resetting vCPU thread. The ordering with the control and
cursor queues is enforced by running the reset operation before
processing their commands as virtio-gpu-gl does.
[1] https://lore.kernel.org/qemu-devel/20251014111234.3190346-9-alex.bennee@linaro.org/
Fixes: a41e2d97f92b ("virtio-gpu: reset gfx resources in main thread")
Signed-off-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
---
include/hw/virtio/virtio-gpu.h | 4 +---
hw/display/virtio-gpu.c | 28 +++++++++++++---------------
2 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index f69fc1946273..48fb54aa4605 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -195,9 +195,7 @@ struct VirtIOGPU {
QEMUBH *ctrl_bh;
QEMUBH *cursor_bh;
- QEMUBH *reset_bh;
- QemuCond reset_cond;
- bool reset_finished;
+ bool reset_pending;
QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index d514b548efd9..4c053e9a2f75 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -40,7 +40,7 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id,
bool require_backing,
const char *caller, uint32_t *error);
-static void virtio_gpu_reset_bh(void *opaque);
+static void virtio_gpu_reset_bh(VirtIOGPU *g);
void virtio_gpu_update_cursor_data(VirtIOGPU *g,
struct virtio_gpu_scanout *s,
@@ -1138,6 +1138,7 @@ static void virtio_gpu_ctrl_bh(void *opaque)
VirtIOGPU *g = opaque;
VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
+ virtio_gpu_reset_bh(g);
vgc->handle_ctrl(VIRTIO_DEVICE(g), g->ctrl_vq);
}
@@ -1176,6 +1177,7 @@ static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
static void virtio_gpu_cursor_bh(void *opaque)
{
VirtIOGPU *g = opaque;
+ virtio_gpu_reset_bh(g);
virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
}
@@ -1560,8 +1562,6 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
g->cursor_vq = virtio_get_queue(vdev, 1);
g->ctrl_bh = virtio_bh_io_new_guarded(qdev, virtio_gpu_ctrl_bh, g);
g->cursor_bh = virtio_bh_io_new_guarded(qdev, virtio_gpu_cursor_bh, g);
- g->reset_bh = virtio_bh_io_new_guarded(qdev, virtio_gpu_reset_bh, g);
- qemu_cond_init(&g->reset_cond);
QTAILQ_INIT(&g->reslist);
QTAILQ_INIT(&g->cmdq);
QTAILQ_INIT(&g->fenceq);
@@ -1573,14 +1573,11 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev)
g_clear_pointer(&g->ctrl_bh, qemu_bh_delete);
g_clear_pointer(&g->cursor_bh, qemu_bh_delete);
- g_clear_pointer(&g->reset_bh, qemu_bh_delete);
- qemu_cond_destroy(&g->reset_cond);
virtio_gpu_base_device_unrealize(qdev);
}
-static void virtio_gpu_reset_bh(void *opaque)
+static void virtio_gpu_reset_bh(VirtIOGPU *g)
{
- VirtIOGPU *g = VIRTIO_GPU(opaque);
VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
struct virtio_gpu_ctrl_command *cmd;
struct virtio_gpu_simple_resource *res, *tmp;
@@ -1588,6 +1585,10 @@ static void virtio_gpu_reset_bh(void *opaque)
Error *local_err = NULL;
int i = 0;
+ if (!g->reset_pending) {
+ return;
+ }
+
QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
resource_id = res->resource_id;
vgc->resource_destroy(g, res, &local_err);
@@ -1621,22 +1622,19 @@ static void virtio_gpu_reset_bh(void *opaque)
virtio_gpu_base_reset(VIRTIO_GPU_BASE(g));
- g->reset_finished = true;
- qemu_cond_signal(&g->reset_cond);
+ g->reset_pending = false;
}
void virtio_gpu_reset(VirtIODevice *vdev)
{
VirtIOGPU *g = VIRTIO_GPU(vdev);
+ g->reset_pending = true;
+
if (qemu_in_vcpu_thread()) {
- g->reset_finished = false;
- qemu_bh_schedule(g->reset_bh);
- while (!g->reset_finished) {
- qemu_cond_wait_bql(&g->reset_cond);
- }
+ qemu_bh_schedule(g->ctrl_bh);
} else {
- aio_bh_call(g->reset_bh);
+ virtio_gpu_reset_bh(g);
}
}
--
2.53.0
© 2016 - 2026 Red Hat, Inc.