accel/tcg/user-exec-stub.c | 4 - accel/tcg/user-exec.c | 55 ++++++ bsd-user/aarch64/target_arch_cpu.h | 6 +- bsd-user/arm/target_arch_cpu.h | 5 +- bsd-user/freebsd/os-syscall.c | 10 + bsd-user/i386/target_arch_cpu.h | 5 +- bsd-user/main.c | 8 +- bsd-user/x86_64/target_arch_cpu.h | 5 +- cpu-common.c | 179 ++++++++++++++++++ gdbstub/gdbstub.c | 17 +- gdbstub/internals.h | 4 +- gdbstub/syscalls.c | 20 +- gdbstub/system.c | 18 +- gdbstub/user.c | 28 ++- include/exec/cpu-common.h | 15 ++ include/exec/replay-core.h | 13 ++ include/hw/core/cpu.h | 1 + include/qemu/thread-posix.h | 8 + include/qemu/thread-win32.h | 8 + include/sysemu/cpus.h | 6 - include/sysemu/replay.h | 13 -- linux-user/aarch64/cpu_loop.c | 5 +- linux-user/alpha/cpu_loop.c | 5 +- linux-user/arm/cpu_loop.c | 5 +- linux-user/hexagon/cpu_loop.c | 5 +- linux-user/hppa/cpu_loop.c | 5 +- linux-user/i386/cpu_loop.c | 5 +- linux-user/loongarch64/cpu_loop.c | 5 +- linux-user/m68k/cpu_loop.c | 5 +- linux-user/main.c | 9 +- linux-user/microblaze/cpu_loop.c | 5 +- linux-user/mips/cpu_loop.c | 5 +- linux-user/openrisc/cpu_loop.c | 5 +- linux-user/ppc/cpu_loop.c | 5 +- linux-user/riscv/cpu_loop.c | 5 +- linux-user/s390x/cpu_loop.c | 5 +- linux-user/sh4/cpu_loop.c | 5 +- linux-user/sparc/cpu_loop.c | 5 +- linux-user/syscall.c | 12 ++ linux-user/xtensa/cpu_loop.c | 5 +- replay/stubs-system.c | 8 + stubs/meson.build | 8 + stubs/qemu-timer.c | 6 + stubs/replay-mutex.c | 10 + stubs/replay-tools.c | 8 - system/cpus.c | 172 +---------------- tests/tcg/multiarch/Makefile.target | 13 +- .../gdbstub/test-thread-breakpoint-stress.py | 28 +++ .../tcg/multiarch/thread-breakpoint-stress.c | 92 +++++++++ 49 files changed, 552 insertions(+), 327 deletions(-) create mode 100644 stubs/qemu-timer.c create mode 100644 stubs/replay-mutex.c create mode 100644 tests/tcg/multiarch/gdbstub/test-thread-breakpoint-stress.py create mode 100644 tests/tcg/multiarch/thread-breakpoint-stress.c
Hi,
On reporting a breakpoint in a non-non-stop mode, GDB remotes must stop
all threads. Currently qemu-user doesn't do that, breaking the
debugging session for at least two reasons: concurrent access to the
GDB socket, and an assertion within GDB [1].
This series fixes this by importing pause_all_vcpus() from qemu-system.
This in turn requires introducing BQL and a few stubs to qemu-user.
Best regards,
Ilya
[1] https://gitlab.com/qemu-project/qemu/-/issues/2465
Ilya Leoshkevich (18):
gdbstub: Make gdb_get_char() static
gdbstub: Move phy_memory_mode to GDBSystemState
gdbstub: Move gdb_syscall_mode to GDBSyscallState
gdbstub: Factor out gdb_try_stop()
accel/tcg: Factor out cpu_exec_user()
qemu-thread: Introduce QEMU_MUTEX_INITIALIZER
qemu-thread: Introduce QEMU_COND_INITIALIZER
replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
qemu-timer: Provide qemu_clock_enable() stub for qemu-user
cpu: Use BQL in qemu-user
accel/tcg: Unify user implementations of qemu_cpu_kick()
cpu: Track CPUs executing syscalls
cpu: Implement cpu_thread_is_idle() for qemu-user
cpu: Introduce cpu_is_paused()
cpu: Set current_cpu early in qemu-user
cpu: Allow pausing and resuming CPUs in qemu-user
gdbstub: Pause all CPUs before sending stop replies
tests/tcg: Stress test thread breakpoints
accel/tcg/user-exec-stub.c | 4 -
accel/tcg/user-exec.c | 55 ++++++
bsd-user/aarch64/target_arch_cpu.h | 6 +-
bsd-user/arm/target_arch_cpu.h | 5 +-
bsd-user/freebsd/os-syscall.c | 10 +
bsd-user/i386/target_arch_cpu.h | 5 +-
bsd-user/main.c | 8 +-
bsd-user/x86_64/target_arch_cpu.h | 5 +-
cpu-common.c | 179 ++++++++++++++++++
gdbstub/gdbstub.c | 17 +-
gdbstub/internals.h | 4 +-
gdbstub/syscalls.c | 20 +-
gdbstub/system.c | 18 +-
gdbstub/user.c | 28 ++-
include/exec/cpu-common.h | 15 ++
include/exec/replay-core.h | 13 ++
include/hw/core/cpu.h | 1 +
include/qemu/thread-posix.h | 8 +
include/qemu/thread-win32.h | 8 +
include/sysemu/cpus.h | 6 -
include/sysemu/replay.h | 13 --
linux-user/aarch64/cpu_loop.c | 5 +-
linux-user/alpha/cpu_loop.c | 5 +-
linux-user/arm/cpu_loop.c | 5 +-
linux-user/hexagon/cpu_loop.c | 5 +-
linux-user/hppa/cpu_loop.c | 5 +-
linux-user/i386/cpu_loop.c | 5 +-
linux-user/loongarch64/cpu_loop.c | 5 +-
linux-user/m68k/cpu_loop.c | 5 +-
linux-user/main.c | 9 +-
linux-user/microblaze/cpu_loop.c | 5 +-
linux-user/mips/cpu_loop.c | 5 +-
linux-user/openrisc/cpu_loop.c | 5 +-
linux-user/ppc/cpu_loop.c | 5 +-
linux-user/riscv/cpu_loop.c | 5 +-
linux-user/s390x/cpu_loop.c | 5 +-
linux-user/sh4/cpu_loop.c | 5 +-
linux-user/sparc/cpu_loop.c | 5 +-
linux-user/syscall.c | 12 ++
linux-user/xtensa/cpu_loop.c | 5 +-
replay/stubs-system.c | 8 +
stubs/meson.build | 8 +
stubs/qemu-timer.c | 6 +
stubs/replay-mutex.c | 10 +
stubs/replay-tools.c | 8 -
system/cpus.c | 172 +----------------
tests/tcg/multiarch/Makefile.target | 13 +-
.../gdbstub/test-thread-breakpoint-stress.py | 28 +++
.../tcg/multiarch/thread-breakpoint-stress.c | 92 +++++++++
49 files changed, 552 insertions(+), 327 deletions(-)
create mode 100644 stubs/qemu-timer.c
create mode 100644 stubs/replay-mutex.c
create mode 100644 tests/tcg/multiarch/gdbstub/test-thread-breakpoint-stress.py
create mode 100644 tests/tcg/multiarch/thread-breakpoint-stress.c
--
2.46.0
Ilya Leoshkevich <iii@linux.ibm.com> writes:
> Hi,
>
> On reporting a breakpoint in a non-non-stop mode, GDB remotes must stop
> all threads. Currently qemu-user doesn't do that, breaking the
> debugging session for at least two reasons: concurrent access to the
> GDB socket, and an assertion within GDB [1].
>
> This series fixes this by importing pause_all_vcpus() from qemu-system.
> This in turn requires introducing BQL and a few stubs to qemu-user.
Is there a conclusion to this design choice? I'd like to avoid bringing
in a bunch of system-mode infrastructure if the existing exclusive code
would work. For that I'll defer to the linux-user maintainer or Richard
who knows the code better than I do.
I could certainly harvest the early clean-up patches to keep the delta
low while the details are worked out. Is there going to be a v2?
>
> Best regards,
> Ilya
>
> [1] https://gitlab.com/qemu-project/qemu/-/issues/2465
>
> Ilya Leoshkevich (18):
> gdbstub: Make gdb_get_char() static
> gdbstub: Move phy_memory_mode to GDBSystemState
> gdbstub: Move gdb_syscall_mode to GDBSyscallState
> gdbstub: Factor out gdb_try_stop()
> accel/tcg: Factor out cpu_exec_user()
> qemu-thread: Introduce QEMU_MUTEX_INITIALIZER
> qemu-thread: Introduce QEMU_COND_INITIALIZER
> replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
> qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> cpu: Use BQL in qemu-user
> accel/tcg: Unify user implementations of qemu_cpu_kick()
> cpu: Track CPUs executing syscalls
> cpu: Implement cpu_thread_is_idle() for qemu-user
> cpu: Introduce cpu_is_paused()
> cpu: Set current_cpu early in qemu-user
> cpu: Allow pausing and resuming CPUs in qemu-user
> gdbstub: Pause all CPUs before sending stop replies
> tests/tcg: Stress test thread breakpoints
>
> accel/tcg/user-exec-stub.c | 4 -
> accel/tcg/user-exec.c | 55 ++++++
> bsd-user/aarch64/target_arch_cpu.h | 6 +-
> bsd-user/arm/target_arch_cpu.h | 5 +-
> bsd-user/freebsd/os-syscall.c | 10 +
> bsd-user/i386/target_arch_cpu.h | 5 +-
> bsd-user/main.c | 8 +-
> bsd-user/x86_64/target_arch_cpu.h | 5 +-
> cpu-common.c | 179 ++++++++++++++++++
> gdbstub/gdbstub.c | 17 +-
> gdbstub/internals.h | 4 +-
> gdbstub/syscalls.c | 20 +-
> gdbstub/system.c | 18 +-
> gdbstub/user.c | 28 ++-
> include/exec/cpu-common.h | 15 ++
> include/exec/replay-core.h | 13 ++
> include/hw/core/cpu.h | 1 +
> include/qemu/thread-posix.h | 8 +
> include/qemu/thread-win32.h | 8 +
> include/sysemu/cpus.h | 6 -
> include/sysemu/replay.h | 13 --
> linux-user/aarch64/cpu_loop.c | 5 +-
> linux-user/alpha/cpu_loop.c | 5 +-
> linux-user/arm/cpu_loop.c | 5 +-
> linux-user/hexagon/cpu_loop.c | 5 +-
> linux-user/hppa/cpu_loop.c | 5 +-
> linux-user/i386/cpu_loop.c | 5 +-
> linux-user/loongarch64/cpu_loop.c | 5 +-
> linux-user/m68k/cpu_loop.c | 5 +-
> linux-user/main.c | 9 +-
> linux-user/microblaze/cpu_loop.c | 5 +-
> linux-user/mips/cpu_loop.c | 5 +-
> linux-user/openrisc/cpu_loop.c | 5 +-
> linux-user/ppc/cpu_loop.c | 5 +-
> linux-user/riscv/cpu_loop.c | 5 +-
> linux-user/s390x/cpu_loop.c | 5 +-
> linux-user/sh4/cpu_loop.c | 5 +-
> linux-user/sparc/cpu_loop.c | 5 +-
> linux-user/syscall.c | 12 ++
> linux-user/xtensa/cpu_loop.c | 5 +-
> replay/stubs-system.c | 8 +
> stubs/meson.build | 8 +
> stubs/qemu-timer.c | 6 +
> stubs/replay-mutex.c | 10 +
> stubs/replay-tools.c | 8 -
> system/cpus.c | 172 +----------------
> tests/tcg/multiarch/Makefile.target | 13 +-
> .../gdbstub/test-thread-breakpoint-stress.py | 28 +++
> .../tcg/multiarch/thread-breakpoint-stress.c | 92 +++++++++
> 49 files changed, 552 insertions(+), 327 deletions(-)
> create mode 100644 stubs/qemu-timer.c
> create mode 100644 stubs/replay-mutex.c
> create mode 100644 tests/tcg/multiarch/gdbstub/test-thread-breakpoint-stress.py
> create mode 100644 tests/tcg/multiarch/thread-breakpoint-stress.c
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
On Wed, 2025-01-08 at 15:56 +0000, Alex Bennée wrote:
> Ilya Leoshkevich <iii@linux.ibm.com> writes:
>
> > Hi,
> >
> > On reporting a breakpoint in a non-non-stop mode, GDB remotes must
> > stop
> > all threads. Currently qemu-user doesn't do that, breaking the
> > debugging session for at least two reasons: concurrent access to
> > the
> > GDB socket, and an assertion within GDB [1].
> >
> > This series fixes this by importing pause_all_vcpus() from qemu-
> > system.
> > This in turn requires introducing BQL and a few stubs to qemu-user.
>
> Is there a conclusion to this design choice? I'd like to avoid
> bringing
> in a bunch of system-mode infrastructure if the existing exclusive
> code
> would work. For that I'll defer to the linux-user maintainer or
> Richard
> who knows the code better than I do.
I wanted to re-implement parking CPUs using a reserved host signal.
I've submitted the foundations for this in [1], and I'm currently
waiting for the review.
[1]
https://lore.kernel.org/qemu-devel/20241216123412.77450-1-iii@linux.ibm.com/
>
> I could certainly harvest the early clean-up patches to keep the
> delta
> low while the details are worked out. Is there going to be a v2?
It would be great if clean-up patches could be taken separately, since
IMHO they make sense on their own. I plan to send a v2 after [1] is
integrated.
> > Best regards,
> > Ilya
> >
> > [1] https://gitlab.com/qemu-project/qemu/-/issues/2465
> >
> > Ilya Leoshkevich (18):
> > gdbstub: Make gdb_get_char() static
> > gdbstub: Move phy_memory_mode to GDBSystemState
> > gdbstub: Move gdb_syscall_mode to GDBSyscallState
> > gdbstub: Factor out gdb_try_stop()
> > accel/tcg: Factor out cpu_exec_user()
> > qemu-thread: Introduce QEMU_MUTEX_INITIALIZER
> > qemu-thread: Introduce QEMU_COND_INITIALIZER
> > replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
> > qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> > cpu: Use BQL in qemu-user
> > accel/tcg: Unify user implementations of qemu_cpu_kick()
> > cpu: Track CPUs executing syscalls
> > cpu: Implement cpu_thread_is_idle() for qemu-user
> > cpu: Introduce cpu_is_paused()
> > cpu: Set current_cpu early in qemu-user
> > cpu: Allow pausing and resuming CPUs in qemu-user
> > gdbstub: Pause all CPUs before sending stop replies
> > tests/tcg: Stress test thread breakpoints
> >
> > accel/tcg/user-exec-stub.c | 4 -
> > accel/tcg/user-exec.c | 55 ++++++
> > bsd-user/aarch64/target_arch_cpu.h | 6 +-
> > bsd-user/arm/target_arch_cpu.h | 5 +-
> > bsd-user/freebsd/os-syscall.c | 10 +
> > bsd-user/i386/target_arch_cpu.h | 5 +-
> > bsd-user/main.c | 8 +-
> > bsd-user/x86_64/target_arch_cpu.h | 5 +-
> > cpu-common.c | 179
> > ++++++++++++++++++
> > gdbstub/gdbstub.c | 17 +-
> > gdbstub/internals.h | 4 +-
> > gdbstub/syscalls.c | 20 +-
> > gdbstub/system.c | 18 +-
> > gdbstub/user.c | 28 ++-
> > include/exec/cpu-common.h | 15 ++
> > include/exec/replay-core.h | 13 ++
> > include/hw/core/cpu.h | 1 +
> > include/qemu/thread-posix.h | 8 +
> > include/qemu/thread-win32.h | 8 +
> > include/sysemu/cpus.h | 6 -
> > include/sysemu/replay.h | 13 --
> > linux-user/aarch64/cpu_loop.c | 5 +-
> > linux-user/alpha/cpu_loop.c | 5 +-
> > linux-user/arm/cpu_loop.c | 5 +-
> > linux-user/hexagon/cpu_loop.c | 5 +-
> > linux-user/hppa/cpu_loop.c | 5 +-
> > linux-user/i386/cpu_loop.c | 5 +-
> > linux-user/loongarch64/cpu_loop.c | 5 +-
> > linux-user/m68k/cpu_loop.c | 5 +-
> > linux-user/main.c | 9 +-
> > linux-user/microblaze/cpu_loop.c | 5 +-
> > linux-user/mips/cpu_loop.c | 5 +-
> > linux-user/openrisc/cpu_loop.c | 5 +-
> > linux-user/ppc/cpu_loop.c | 5 +-
> > linux-user/riscv/cpu_loop.c | 5 +-
> > linux-user/s390x/cpu_loop.c | 5 +-
> > linux-user/sh4/cpu_loop.c | 5 +-
> > linux-user/sparc/cpu_loop.c | 5 +-
> > linux-user/syscall.c | 12 ++
> > linux-user/xtensa/cpu_loop.c | 5 +-
> > replay/stubs-system.c | 8 +
> > stubs/meson.build | 8 +
> > stubs/qemu-timer.c | 6 +
> > stubs/replay-mutex.c | 10 +
> > stubs/replay-tools.c | 8 -
> > system/cpus.c | 172 +-------------
> > ---
> > tests/tcg/multiarch/Makefile.target | 13 +-
> > .../gdbstub/test-thread-breakpoint-stress.py | 28 +++
> > .../tcg/multiarch/thread-breakpoint-stress.c | 92 +++++++++
> > 49 files changed, 552 insertions(+), 327 deletions(-)
> > create mode 100644 stubs/qemu-timer.c
> > create mode 100644 stubs/replay-mutex.c
> > create mode 100644 tests/tcg/multiarch/gdbstub/test-thread-
> > breakpoint-stress.py
> > create mode 100644 tests/tcg/multiarch/thread-breakpoint-stress.c
>
On 9/23/24 18:12, Ilya Leoshkevich wrote:
> Hi,
>
> On reporting a breakpoint in a non-non-stop mode, GDB remotes must stop
> all threads. Currently qemu-user doesn't do that, breaking the
> debugging session for at least two reasons: concurrent access to the
> GDB socket, and an assertion within GDB [1].
>
> This series fixes this by importing pause_all_vcpus() from qemu-system.
> This in turn requires introducing BQL and a few stubs to qemu-user.
I would have expected you to reuse (some portion of) start_exclusive, which is already
part of qemu-user. Is there a reason you chose a solution which requires...
> replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
> qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> cpu: Use BQL in qemu-user
all sorts of other infrastructure?
r~
On Tue, 2024-09-24 at 13:46 +0200, Richard Henderson wrote:
> On 9/23/24 18:12, Ilya Leoshkevich wrote:
> > Hi,
> >
> > On reporting a breakpoint in a non-non-stop mode, GDB remotes must
> > stop
> > all threads. Currently qemu-user doesn't do that, breaking the
> > debugging session for at least two reasons: concurrent access to
> > the
> > GDB socket, and an assertion within GDB [1].
> >
> > This series fixes this by importing pause_all_vcpus() from qemu-
> > system.
> > This in turn requires introducing BQL and a few stubs to qemu-user.
>
> I would have expected you to reuse (some portion of) start_exclusive,
> which is already
> part of qemu-user. Is there a reason you chose a solution which
> requires...
>
> > replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
> > qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> > cpu: Use BQL in qemu-user
>
> all sorts of other infrastructure?
>
>
> r~
I don't think start_exclusive() would protect the gdb socket from
concurrent accesses (e.g., if two threads are simultaneously stopped).
I have a patch [1] that introduces a big gdbstub lock for that, but it
looks more complex than just extending BQL to qemu-user. Also, the
BQL-based pause/resume code already works for the system mode and is
well tested.
[1]
https://gitlab.com/iii-i/qemu/-/commit/0944716218820f8bdfdcf80acc6c39a48b91670c
On 9/25/24 00:43, Ilya Leoshkevich wrote:
> On Tue, 2024-09-24 at 13:46 +0200, Richard Henderson wrote:
>> On 9/23/24 18:12, Ilya Leoshkevich wrote:
>>> Hi,
>>>
>>> On reporting a breakpoint in a non-non-stop mode, GDB remotes must
>>> stop
>>> all threads. Currently qemu-user doesn't do that, breaking the
>>> debugging session for at least two reasons: concurrent access to
>>> the
>>> GDB socket, and an assertion within GDB [1].
>>>
>>> This series fixes this by importing pause_all_vcpus() from qemu-
>>> system.
>>> This in turn requires introducing BQL and a few stubs to qemu-user.
>>
>> I would have expected you to reuse (some portion of) start_exclusive,
>> which is already
>> part of qemu-user. Is there a reason you chose a solution which
>> requires...
>>
>>> replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
>>> qemu-timer: Provide qemu_clock_enable() stub for qemu-user
>>> cpu: Use BQL in qemu-user
>>
>> all sorts of other infrastructure?
>>
>>
>> r~
>
> I don't think start_exclusive() would protect the gdb socket from
> concurrent accesses (e.g., if two threads are simultaneously stopped).
Of course it would, otherwise "exclusive" has no meaning.
All other cpus are blocked in exclusive_idle().
Importantly, no cpus are blocked in syscalls, where the kernel can modify memory behind
gdbstub's back (e.g. read). I think considering "in_syscall" to be "paused" a mistake.
r~
On Sat, 2024-10-05 at 12:51 -0700, Richard Henderson wrote:
> On 9/25/24 00:43, Ilya Leoshkevich wrote:
> > On Tue, 2024-09-24 at 13:46 +0200, Richard Henderson wrote:
> > > On 9/23/24 18:12, Ilya Leoshkevich wrote:
> > > > Hi,
> > > >
> > > > On reporting a breakpoint in a non-non-stop mode, GDB remotes
> > > > must
> > > > stop
> > > > all threads. Currently qemu-user doesn't do that, breaking the
> > > > debugging session for at least two reasons: concurrent access
> > > > to
> > > > the
> > > > GDB socket, and an assertion within GDB [1].
> > > >
> > > > This series fixes this by importing pause_all_vcpus() from
> > > > qemu-
> > > > system.
> > > > This in turn requires introducing BQL and a few stubs to qemu-
> > > > user.
> > >
> > > I would have expected you to reuse (some portion of)
> > > start_exclusive,
> > > which is already
> > > part of qemu-user. Is there a reason you chose a solution which
> > > requires...
> > >
> > > > replay: Add replay_mutex_{lock,unlock}() stubs for qemu-
> > > > user
> > > > qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> > > > cpu: Use BQL in qemu-user
> > >
> > > all sorts of other infrastructure?
> > >
> > >
> > > r~
> >
> > I don't think start_exclusive() would protect the gdb socket from
> > concurrent accesses (e.g., if two threads are simultaneously
> > stopped).
>
> Of course it would, otherwise "exclusive" has no meaning.
> All other cpus are blocked in exclusive_idle().
>
> Importantly, no cpus are blocked in syscalls, where the kernel can
> modify memory behind
> gdbstub's back (e.g. read). I think considering "in_syscall" to be
> "paused" a mistake.
>
>
> r~
How can we handle the long-running syscalls?
Just waiting sounds unsatisfying.
Sending a reserved host signal may alter the guest's behaviour if a
syscall like pause() is interrupted.
What do you think about SIGSTOP-ping the "in_syscall" threads?
A quick experiment shows that it should be completely invisible to the
guest - the following program continues to run after SIGSTOP/SIGCONT:
#include <sys/syscall.h>
#include <unistd.h>
int main(void) { syscall(__NR_pause); };
On Sat, 2024-10-05 at 22:26 +0200, Ilya Leoshkevich wrote:
> On Sat, 2024-10-05 at 12:51 -0700, Richard Henderson wrote:
> > On 9/25/24 00:43, Ilya Leoshkevich wrote:
> > > On Tue, 2024-09-24 at 13:46 +0200, Richard Henderson wrote:
> > > > On 9/23/24 18:12, Ilya Leoshkevich wrote:
> > > > > Hi,
> > > > >
> > > > > On reporting a breakpoint in a non-non-stop mode, GDB remotes
> > > > > must
> > > > > stop
> > > > > all threads. Currently qemu-user doesn't do that, breaking
> > > > > the
> > > > > debugging session for at least two reasons: concurrent access
> > > > > to
> > > > > the
> > > > > GDB socket, and an assertion within GDB [1].
> > > > >
> > > > > This series fixes this by importing pause_all_vcpus() from
> > > > > qemu-
> > > > > system.
> > > > > This in turn requires introducing BQL and a few stubs to
> > > > > qemu-
> > > > > user.
> > > >
> > > > I would have expected you to reuse (some portion of)
> > > > start_exclusive,
> > > > which is already
> > > > part of qemu-user. Is there a reason you chose a solution
> > > > which
> > > > requires...
> > > >
> > > > > replay: Add replay_mutex_{lock,unlock}() stubs for qemu-
> > > > > user
> > > > > qemu-timer: Provide qemu_clock_enable() stub for qemu-
> > > > > user
> > > > > cpu: Use BQL in qemu-user
> > > >
> > > > all sorts of other infrastructure?
> > > >
> > > >
> > > > r~
> > >
> > > I don't think start_exclusive() would protect the gdb socket from
> > > concurrent accesses (e.g., if two threads are simultaneously
> > > stopped).
> >
> > Of course it would, otherwise "exclusive" has no meaning.
> > All other cpus are blocked in exclusive_idle().
> >
> > Importantly, no cpus are blocked in syscalls, where the kernel can
> > modify memory behind
> > gdbstub's back (e.g. read). I think considering "in_syscall" to be
> > "paused" a mistake.
> >
> >
> > r~
>
> How can we handle the long-running syscalls?
> Just waiting sounds unsatisfying.
> Sending a reserved host signal may alter the guest's behaviour if a
> syscall like pause() is interrupted.
> What do you think about SIGSTOP-ping the "in_syscall" threads?
> A quick experiment shows that it should be completely invisible to
> the
> guest - the following program continues to run after SIGSTOP/SIGCONT:
>
> #include <sys/syscall.h>
> #include <unistd.h>
> int main(void) { syscall(__NR_pause); };
Hmm, no, that won't work: SIGSTOP would stop all threads.
So I wonder if reserving a host signal for interrupting "in_syscall"
threads would be an acceptable tradeoff?
On 10/5/24 13:35, Ilya Leoshkevich wrote:
>> How can we handle the long-running syscalls?
>> Just waiting sounds unsatisfying.
>> Sending a reserved host signal may alter the guest's behaviour if a
>> syscall like pause() is interrupted.
>> What do you think about SIGSTOP-ping the "in_syscall" threads?
>> A quick experiment shows that it should be completely invisible to
>> the
>> guest - the following program continues to run after SIGSTOP/SIGCONT:
>>
>> #include <sys/syscall.h>
>> #include <unistd.h>
>> int main(void) { syscall(__NR_pause); };
>
> Hmm, no, that won't work: SIGSTOP would stop all threads.
>
> So I wonder if reserving a host signal for interrupting "in_syscall"
> threads would be an acceptable tradeoff?
Could work, yes. We already steal SIGRTMIN for guest abort (to distinguish from host
abort), and remap guest __SIGRTMIN to host SIGRTMIN+1. Grabbing SIGRTMIN+1 should work
ok, modulo the existing problem of presenting the guest with an incomplete set of signals.
I've wondered from time to time about multiplexing signals in this space, but I think that
runs afoul of having a consistent mapping for interprocess signaling.
r~
On Tue, 2024-10-08 at 11:17 -0700, Richard Henderson wrote:
> On 10/5/24 13:35, Ilya Leoshkevich wrote:
> > > How can we handle the long-running syscalls?
> > > Just waiting sounds unsatisfying.
> > > Sending a reserved host signal may alter the guest's behaviour if
> > > a
> > > syscall like pause() is interrupted.
> > > What do you think about SIGSTOP-ping the "in_syscall" threads?
> > > A quick experiment shows that it should be completely invisible
> > > to
> > > the
> > > guest - the following program continues to run after
> > > SIGSTOP/SIGCONT:
> > >
> > > #include <sys/syscall.h>
> > > #include <unistd.h>
> > > int main(void) { syscall(__NR_pause); };
> >
> > Hmm, no, that won't work: SIGSTOP would stop all threads.
> >
> > So I wonder if reserving a host signal for interrupting
> > "in_syscall"
> > threads would be an acceptable tradeoff?
>
> Could work, yes. We already steal SIGRTMIN for guest abort (to
> distinguish from host
> abort), and remap guest __SIGRTMIN to host SIGRTMIN+1. Grabbing
> SIGRTMIN+1 should work
> ok, modulo the existing problem of presenting the guest with an
> incomplete set of signals.
>
> I've wondered from time to time about multiplexing signals in this
> space, but I think that
> runs afoul of having a consistent mapping for interprocess signaling.
>
>
> r~
I tried to think through how this would work in conjunction with
start_exclusive(), and there is one problem I don't see a good solution
for. Maybe you will have an idea.
The way I'm thinking of implementing this is as follows:
- Reserve the host's SIGRTMIN+1 and tweak host_signal_handler() to do
nothing for this signal.
- In gdb_try_stop(), call start_exclusive(). After it returns, some
threads will be parked in exclusive_idle(). Some other threads will
be on their way to getting parked, and this needs to actually happen
before gdb_try_stop() can proceed. For example, the ones that are
executing handle_pending_signal() may change memory and CPU state.
IIUC start_exclusive() will not wait for them, because they are not
"running". I think a global counter protected by qemu_cpu_list_lock
and paired with a new condition variable should be enough for this.
- Threads executing long-running syscalls will need to be interrupted
by SIGRTMIN+1. These syscalls will return -EINTR and will need
to be manually restarted so as not to disturb poorly written guests.
This needs to happen only if there are no pending guest signals.
- Here is a minor problem: how to identify threads which need to be
signalled? in_syscall may not be enough. But maybe signalling all
threads won't hurt too much. The parked ones won't notice anyway.
- But here is the major problem: what if we signal a thread just before
it starts executing a long-running syscall? Such thread will be stuck
and we'll need to signal it again. But how to determine that this
needs to be done?
An obvious solution is to signal all threads in a loop with a 0.1s
delay until the counter reaches n_threads. But it's quite ugly.
Ideally SIGRTMIN+1 should be blocked most of the time. Then we should
identify all places where long-running syscalls may be invoked and
unblock SIGRTMIN+1 atomically with executing them. But I'm not aware
of such mechanism (I have an extremely vague recollection that
someone managed to abuse rseq for this, but we shouldn't be relying
on rseq being available anyway).
On Mon, 2024-09-23 at 18:12 +0200, Ilya Leoshkevich wrote:
> Hi,
>
> On reporting a breakpoint in a non-non-stop mode, GDB remotes must
> stop
> all threads. Currently qemu-user doesn't do that, breaking the
> debugging session for at least two reasons: concurrent access to the
> GDB socket, and an assertion within GDB [1].
>
> This series fixes this by importing pause_all_vcpus() from qemu-
> system.
> This in turn requires introducing BQL and a few stubs to qemu-user.
>
> Best regards,
> Ilya
>
> [1] https://gitlab.com/qemu-project/qemu/-/issues/2465
>
> Ilya Leoshkevich (18):
> gdbstub: Make gdb_get_char() static
> gdbstub: Move phy_memory_mode to GDBSystemState
> gdbstub: Move gdb_syscall_mode to GDBSyscallState
> gdbstub: Factor out gdb_try_stop()
> accel/tcg: Factor out cpu_exec_user()
> qemu-thread: Introduce QEMU_MUTEX_INITIALIZER
> qemu-thread: Introduce QEMU_COND_INITIALIZER
> replay: Add replay_mutex_{lock,unlock}() stubs for qemu-user
> qemu-timer: Provide qemu_clock_enable() stub for qemu-user
> cpu: Use BQL in qemu-user
> accel/tcg: Unify user implementations of qemu_cpu_kick()
> cpu: Track CPUs executing syscalls
> cpu: Implement cpu_thread_is_idle() for qemu-user
> cpu: Introduce cpu_is_paused()
> cpu: Set current_cpu early in qemu-user
> cpu: Allow pausing and resuming CPUs in qemu-user
> gdbstub: Pause all CPUs before sending stop replies
> tests/tcg: Stress test thread breakpoints
>
> accel/tcg/user-exec-stub.c | 4 -
> accel/tcg/user-exec.c | 55 ++++++
> bsd-user/aarch64/target_arch_cpu.h | 6 +-
> bsd-user/arm/target_arch_cpu.h | 5 +-
> bsd-user/freebsd/os-syscall.c | 10 +
> bsd-user/i386/target_arch_cpu.h | 5 +-
> bsd-user/main.c | 8 +-
> bsd-user/x86_64/target_arch_cpu.h | 5 +-
> cpu-common.c | 179
> ++++++++++++++++++
> gdbstub/gdbstub.c | 17 +-
> gdbstub/internals.h | 4 +-
> gdbstub/syscalls.c | 20 +-
> gdbstub/system.c | 18 +-
> gdbstub/user.c | 28 ++-
> include/exec/cpu-common.h | 15 ++
> include/exec/replay-core.h | 13 ++
> include/hw/core/cpu.h | 1 +
> include/qemu/thread-posix.h | 8 +
> include/qemu/thread-win32.h | 8 +
> include/sysemu/cpus.h | 6 -
> include/sysemu/replay.h | 13 --
> linux-user/aarch64/cpu_loop.c | 5 +-
> linux-user/alpha/cpu_loop.c | 5 +-
> linux-user/arm/cpu_loop.c | 5 +-
> linux-user/hexagon/cpu_loop.c | 5 +-
> linux-user/hppa/cpu_loop.c | 5 +-
> linux-user/i386/cpu_loop.c | 5 +-
> linux-user/loongarch64/cpu_loop.c | 5 +-
> linux-user/m68k/cpu_loop.c | 5 +-
> linux-user/main.c | 9 +-
> linux-user/microblaze/cpu_loop.c | 5 +-
> linux-user/mips/cpu_loop.c | 5 +-
> linux-user/openrisc/cpu_loop.c | 5 +-
> linux-user/ppc/cpu_loop.c | 5 +-
> linux-user/riscv/cpu_loop.c | 5 +-
> linux-user/s390x/cpu_loop.c | 5 +-
> linux-user/sh4/cpu_loop.c | 5 +-
> linux-user/sparc/cpu_loop.c | 5 +-
> linux-user/syscall.c | 12 ++
> linux-user/xtensa/cpu_loop.c | 5 +-
> replay/stubs-system.c | 8 +
> stubs/meson.build | 8 +
> stubs/qemu-timer.c | 6 +
> stubs/replay-mutex.c | 10 +
> stubs/replay-tools.c | 8 -
> system/cpus.c | 172 +---------------
> -
> tests/tcg/multiarch/Makefile.target | 13 +-
> .../gdbstub/test-thread-breakpoint-stress.py | 28 +++
> .../tcg/multiarch/thread-breakpoint-stress.c | 92 +++++++++
> 49 files changed, 552 insertions(+), 327 deletions(-)
> create mode 100644 stubs/qemu-timer.c
> create mode 100644 stubs/replay-mutex.c
> create mode 100644 tests/tcg/multiarch/gdbstub/test-thread-
> breakpoint-stress.py
> create mode 100644 tests/tcg/multiarch/thread-breakpoint-stress.c
Correction: the subject should have "qemu-user" instead of "qemu-cpu".
© 2016 - 2026 Red Hat, Inc.