[v4] tests/functional: Adapt reverse_debugging to run w/o Avocado

[PATCH v4 8/9] tests/functional: Adapt reverse_debugging to run w/o Avocado

Posted by Gustavo Romero 2 days, 10 hours ago

This commit removes Avocado as a dependency for running the
reverse_debugging test.

The main benefit, beyond eliminating an extra dependency, is that there
is no longer any need to handle GDB packets manually. This removes the
need for ad-hoc functions dealing with endianness and arch-specific
register numbers, making the test easier to read. The timeout variable
is also removed, since Meson now manages timeouts automatically.

reverse_debugging now uses the pygdbmi module to interact with GDB, if
it's available in the test environment, otherwise the test is skipped.
GDB is detect via the QEMU_TEST_GDB env. variable.

This commit also significantly improves the output for the test and
now prints all the GDB commands used in sequence. It also adds
some clarifications to existing comments, for example, clarifying that
once the replay-break is reached, a SIGINT is captured in GDB.

reverse_debugging is kept "skipped" for aarch64, ppc64, and x86_64, so
won't run unless QEMU_TEST_FLAKY_TESTS=1 is set in the test environment,
before running 'make check-functional' or 'meson test [...]'.

Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
---
 tests/functional/reverse_debugging.py | 127 +++++++++++++-------------
 1 file changed, 61 insertions(+), 66 deletions(-)

diff --git a/tests/functional/reverse_debugging.py b/tests/functional/reverse_debugging.py
index 7fd8c7607f..4e4200dd48 100644
--- a/tests/functional/reverse_debugging.py
+++ b/tests/functional/reverse_debugging.py
@@ -1,19 +1,23 @@
-# Reverse debugging test
-#
 # SPDX-License-Identifier: GPL-2.0-or-later
 #
+# Reverse debugging test
+#
 # Copyright (c) 2020 ISP RAS
+# Copyright (c) 2025 Linaro Limited
 #
 # Author:
 #  Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
+#  Gustavo Romero <gustavo.romero@linaro.org> (Run without Avocado)
 #
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
-import os
+
 import logging
+import os
+from pygdbmi.constants import GdbTimeoutError
 from subprocess import check_output
 
-from qemu_test import LinuxKernelTest, get_qemu_img
+from qemu_test import LinuxKernelTest, get_qemu_img, GDB, skipIfMissingEnv
 from qemu_test.ports import Ports
 
 
@@ -29,9 +33,7 @@ class ReverseDebugging(LinuxKernelTest):
     that the execution is stopped at the last of them.
     """
 
-    timeout = 10
     STEPS = 10
-    endian_is_le = True
 
     def run_vm(self, record, shift, args, replay_path, image_path, port):
         logger = logging.getLogger('replay')
@@ -53,49 +55,11 @@ def run_vm(self, record, shift, args, replay_path, image_path, port):
         vm.launch()
         return vm
 
-    @staticmethod
-    def get_reg_le(g, reg):
-        res = g.cmd(b'p%x' % reg)
-        num = 0
-        for i in range(len(res))[-2::-2]:
-            num = 0x100 * num + int(res[i:i + 2], 16)
-        return num
-
-    @staticmethod
-    def get_reg_be(g, reg):
-        res = g.cmd(b'p%x' % reg)
-        return int(res, 16)
-
-    def get_reg(self, g, reg):
-        # value may be encoded in BE or LE order
-        if self.endian_is_le:
-            return self.get_reg_le(g, reg)
-        else:
-            return self.get_reg_be(g, reg)
-
-    def get_pc(self, g):
-        return self.get_reg(g, self.REG_PC)
-
-    def check_pc(self, g, addr):
-        pc = self.get_pc(g)
-        if pc != addr:
-            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
-
-    @staticmethod
-    def gdb_step(g):
-        g.cmd(b's', b'T05thread:01;')
-
-    @staticmethod
-    def gdb_bstep(g):
-        g.cmd(b'bs', b'T05thread:01;')
-
     @staticmethod
     def vm_get_icount(vm):
         return vm.qmp('query-replay')['return']['icount']
 
     def reverse_debugging(self, shift=7, args=None):
-        from avocado.utils import gdb
-
         logger = logging.getLogger('replay')
 
         # create qcow2 for snapshots
@@ -124,68 +88,99 @@ def reverse_debugging(self, shift=7, args=None):
         with Ports() as ports:
             port = ports.find_free_port()
             vm = self.run_vm(False, shift, args, replay_path, image_path, port)
-        logger.info('connecting to gdbstub')
-        g = gdb.GDBRemote('127.0.0.1', port, False, False)
-        g.connect()
-        r = g.cmd(b'qSupported')
-        if b'qXfer:features:read+' in r:
-            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
-        if b'ReverseStep+' not in r:
+
+        try:
+            logger.info('Connecting to gdbstub...')
+            self.reverse_debugging_run(vm, port, last_icount)
+            logger.info('Test passed.')
+        except GdbTimeoutError:
+            self.fail("Connection to gdbstub timeouted...")
+
+    @skipIfMissingEnv("QEMU_TEST_GDB")
+    def reverse_debugging_run(self, vm, port, last_icount):
+        logger = logging.getLogger('replay')
+
+        gdb_cmd = os.getenv('QEMU_TEST_GDB')
+        gdb = GDB(gdb_cmd)
+
+        gdb.cli("set debug remote 1")
+
+        c = gdb.cli(f"target remote localhost:{port}").get_console()
+        if not f"Remote debugging using localhost:{port}" in c:
+            self.fail("Could not connect to gdbstub!")
+
+        # Remote debug messages are in 'log' payloads.
+        r = gdb.get_log()
+        if 'ReverseStep+' not in r:
             self.fail('Reverse step is not supported by QEMU')
-        if b'ReverseContinue+' not in r:
+        if 'ReverseContinue+' not in r:
             self.fail('Reverse continue is not supported by QEMU')
 
+        gdb.cli("set debug remote 0")
+
         logger.info('stepping forward')
         steps = []
         # record first instruction addresses
         for _ in range(self.STEPS):
-            pc = self.get_pc(g)
+            pc = gdb.cli("print $pc").get_addr()
             logger.info('saving position %x' % pc)
             steps.append(pc)
-            self.gdb_step(g)
+            gdb.cli("stepi")
 
         # visit the recorded instruction in reverse order
         logger.info('stepping backward')
         for addr in steps[::-1]:
-            self.gdb_bstep(g)
-            self.check_pc(g, addr)
             logger.info('found position %x' % addr)
+            gdb.cli("reverse-stepi")
+            pc = gdb.cli("print $pc").get_addr()
+            if pc != addr:
+                logger.info('Invalid PC (read %x instead of %x)' % (pc, addr))
+                self.fail('Reverse stepping failed!')
 
         # visit the recorded instruction in forward order
         logger.info('stepping forward')
         for addr in steps:
-            self.check_pc(g, addr)
-            self.gdb_step(g)
             logger.info('found position %x' % addr)
+            pc = gdb.cli("print $pc").get_addr()
+            if pc != addr:
+                logger.info('Invalid PC (read %x instead of %x)' % (pc, addr))
+                self.fail('Forward stepping failed!')
+            gdb.cli("stepi")
 
         # set breakpoints for the instructions just stepped over
         logger.info('setting breakpoints')
         for addr in steps:
-            # hardware breakpoint at addr with len=1
-            g.cmd(b'Z1,%x,1' % addr, b'OK')
+            gdb.cli(f"break *{hex(addr)}")
 
         # this may hit a breakpoint if first instructions are executed
         # again
         logger.info('continuing execution')
         vm.qmp('replay-break', icount=last_icount - 1)
         # continue - will return after pausing
-        # This could stop at the end and get a T02 return, or by
-        # re-executing one of the breakpoints and get a T05 return.
-        g.cmd(b'c')
+        # This can stop at the end of the replay-break and gdb gets a SIGINT,
+        # or by re-executing one of the breakpoints and gdb stops at a
+        # breakpoint.
+        gdb.cli("continue")
+
+        pc = gdb.cli("print $pc").get_addr()
         if self.vm_get_icount(vm) == last_icount - 1:
             logger.info('reached the end (icount %s)' % (last_icount - 1))
         else:
             logger.info('hit a breakpoint again at %x (icount %s)' %
-                        (self.get_pc(g), self.vm_get_icount(vm)))
+                        (pc, self.vm_get_icount(vm)))
 
         logger.info('running reverse continue to reach %x' % steps[-1])
         # reverse continue - will return after stopping at the breakpoint
-        g.cmd(b'bc', b'T05thread:01;')
+        gdb.cli("reverse-continue")
 
         # assume that none of the first instructions is executed again
         # breaking the order of the breakpoints
-        self.check_pc(g, steps[-1])
+        pc = gdb.cli("print $pc").get_addr()
+        if pc != steps[-1]:
+            self.fail("'reverse-continue' did not hit the first PC in reverse order!")
+
         logger.info('successfully reached %x' % steps[-1])
 
         logger.info('exiting gdb and qemu')
+        gdb.exit()
         vm.shutdown()
-- 
2.34.1

Re: [PATCH v4 8/9] tests/functional: Adapt reverse_debugging to run w/o Avocado

Posted by Thomas Huth 2 days, 7 hours ago

On 26/09/2025 07.15, Gustavo Romero wrote:
> This commit removes Avocado as a dependency for running the
> reverse_debugging test.
> 
> The main benefit, beyond eliminating an extra dependency, is that there
> is no longer any need to handle GDB packets manually. This removes the
> need for ad-hoc functions dealing with endianness and arch-specific
> register numbers, making the test easier to read. The timeout variable
> is also removed, since Meson now manages timeouts automatically.
> 
> reverse_debugging now uses the pygdbmi module to interact with GDB, if
> it's available in the test environment, otherwise the test is skipped.
> GDB is detect via the QEMU_TEST_GDB env. variable.
> 
> This commit also significantly improves the output for the test and
> now prints all the GDB commands used in sequence. It also adds
> some clarifications to existing comments, for example, clarifying that
> once the replay-break is reached, a SIGINT is captured in GDB.
> 
> reverse_debugging is kept "skipped" for aarch64, ppc64, and x86_64, so
> won't run unless QEMU_TEST_FLAKY_TESTS=1 is set in the test environment,
> before running 'make check-functional' or 'meson test [...]'.
> 
> Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
> ---
...
> @@ -53,49 +55,11 @@ def run_vm(self, record, shift, args, replay_path, image_path, port):
>           vm.launch()
>           return vm
>   
> -    @staticmethod
> -    def get_reg_le(g, reg):
> -        res = g.cmd(b'p%x' % reg)
> -        num = 0
> -        for i in range(len(res))[-2::-2]:
> -            num = 0x100 * num + int(res[i:i + 2], 16)
> -        return num
> -
> -    @staticmethod
> -    def get_reg_be(g, reg):
> -        res = g.cmd(b'p%x' % reg)
> -        return int(res, 16)
> -
> -    def get_reg(self, g, reg):
> -        # value may be encoded in BE or LE order
> -        if self.endian_is_le:
> -            return self.get_reg_le(g, reg)
> -        else:
> -            return self.get_reg_be(g, reg)
> -
> -    def get_pc(self, g):
> -        return self.get_reg(g, self.REG_PC)
> -
> -    def check_pc(self, g, addr):
> -        pc = self.get_pc(g)
> -        if pc != addr:
> -            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))

I think it would make sense to keep wrapper functions get_pc() and 
check_pc(), since that functionality is still used in a bunch of places 
(e.g. "gdb.cli("print $pc").get_addr()" is used a couple of times).

> @@ -124,68 +88,99 @@ def reverse_debugging(self, shift=7, args=None):
>           with Ports() as ports:
>               port = ports.find_free_port()
>               vm = self.run_vm(False, shift, args, replay_path, image_path, port)
> -        logger.info('connecting to gdbstub')
> -        g = gdb.GDBRemote('127.0.0.1', port, False, False)
> -        g.connect()
> -        r = g.cmd(b'qSupported')
> -        if b'qXfer:features:read+' in r:
> -            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
> -        if b'ReverseStep+' not in r:
> +
> +        try:
> +            logger.info('Connecting to gdbstub...')
> +            self.reverse_debugging_run(vm, port, last_icount)
> +            logger.info('Test passed.')
> +        except GdbTimeoutError:
> +            self.fail("Connection to gdbstub timeouted...")

I'm not a native English speaker, but I think "timeout" is not a verb. So 
maybe better: "Timeout while connecting to gdbstub" or something similar?

> +    @skipIfMissingEnv("QEMU_TEST_GDB")

Somehow this SKIP is always triggered on my system, even if I correctly 
pointed "configure" to the right GDB with the "--gdb" option ... not sure 
why this happens, but I'll try to find out...

  Thomas

Re: [PATCH v4 8/9] tests/functional: Adapt reverse_debugging to run w/o Avocado

Posted by Gustavo Romero 1 day, 23 hours ago

Hi Thomas,

On 9/26/25 05:44, Thomas Huth wrote:
> On 26/09/2025 07.15, Gustavo Romero wrote:
>> This commit removes Avocado as a dependency for running the
>> reverse_debugging test.
>>
>> The main benefit, beyond eliminating an extra dependency, is that there
>> is no longer any need to handle GDB packets manually. This removes the
>> need for ad-hoc functions dealing with endianness and arch-specific
>> register numbers, making the test easier to read. The timeout variable
>> is also removed, since Meson now manages timeouts automatically.
>>
>> reverse_debugging now uses the pygdbmi module to interact with GDB, if
>> it's available in the test environment, otherwise the test is skipped.
>> GDB is detect via the QEMU_TEST_GDB env. variable.
>>
>> This commit also significantly improves the output for the test and
>> now prints all the GDB commands used in sequence. It also adds
>> some clarifications to existing comments, for example, clarifying that
>> once the replay-break is reached, a SIGINT is captured in GDB.
>>
>> reverse_debugging is kept "skipped" for aarch64, ppc64, and x86_64, so
>> won't run unless QEMU_TEST_FLAKY_TESTS=1 is set in the test environment,
>> before running 'make check-functional' or 'meson test [...]'.
>>
>> Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org>
>> ---
> ...
>> @@ -53,49 +55,11 @@ def run_vm(self, record, shift, args, replay_path, image_path, port):
>>           vm.launch()
>>           return vm
>> -    @staticmethod
>> -    def get_reg_le(g, reg):
>> -        res = g.cmd(b'p%x' % reg)
>> -        num = 0
>> -        for i in range(len(res))[-2::-2]:
>> -            num = 0x100 * num + int(res[i:i + 2], 16)
>> -        return num
>> -
>> -    @staticmethod
>> -    def get_reg_be(g, reg):
>> -        res = g.cmd(b'p%x' % reg)
>> -        return int(res, 16)
>> -
>> -    def get_reg(self, g, reg):
>> -        # value may be encoded in BE or LE order
>> -        if self.endian_is_le:
>> -            return self.get_reg_le(g, reg)
>> -        else:
>> -            return self.get_reg_be(g, reg)
>> -
>> -    def get_pc(self, g):
>> -        return self.get_reg(g, self.REG_PC)
>> -
>> -    def check_pc(self, g, addr):
>> -        pc = self.get_pc(g)
>> -        if pc != addr:
>> -            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
> 
> I think it would make sense to keep wrapper functions get_pc() and check_pc(), since that functionality is still used in a bunch of places (e.g. "gdb.cli("print $pc").get_addr()" is used a couple of times).

Yeah, I considered that, but I really think that using the
wrapper functions doesn't add to this test (as in the original test).
First because I like reading the test code and easily map it to
its output and, second, the original test that used check_pc() was
actually failing with this "Invalid PC ... " message in all the cases,
which is not informative. In my version, because I check PC in place,
I also fail with a specific message for each case, like "Forward stepping failed¨,
"Reverse stepping failed", "reverse-continue", etc.

If you don't mind I'd like to keep the test this way.


>> @@ -124,68 +88,99 @@ def reverse_debugging(self, shift=7, args=None):
>>           with Ports() as ports:
>>               port = ports.find_free_port()
>>               vm = self.run_vm(False, shift, args, replay_path, image_path, port)
>> -        logger.info('connecting to gdbstub')
>> -        g = gdb.GDBRemote('127.0.0.1', port, False, False)
>> -        g.connect()
>> -        r = g.cmd(b'qSupported')
>> -        if b'qXfer:features:read+' in r:
>> -            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
>> -        if b'ReverseStep+' not in r:
>> +
>> +        try:
>> +            logger.info('Connecting to gdbstub...')
>> +            self.reverse_debugging_run(vm, port, last_icount)
>> +            logger.info('Test passed.')
>> +        except GdbTimeoutError:
>> +            self.fail("Connection to gdbstub timeouted...")
> 
> I'm not a native English speaker, but I think "timeout" is not a verb. So maybe better: "Timeout while connecting to gdbstub" or something similar?
> 
>> +    @skipIfMissingEnv("QEMU_TEST_GDB")
> 
> Somehow this SKIP is always triggered on my system, even if I correctly pointed "configure" to the right GDB with the "--gdb" option ... not sure why this happens, but I'll try to find out...

hmm maybe you could play with the env. vars by running:

$ ./pyvenv/bin/meson test  --verbose --no-rebuild -t 1 --setup thorough  --suite func-thorough  func-aarch64-reverse_debug

which has "--verbose" so you can see and use the "raw" command containing the env. variables, e.g.:

gromero@gromero0:/mnt/git/qemu_/build$ G_TEST_SLOW=1 QEMU_TEST_QEMU_IMG=/mnt/git/qemu_/build/qemu-img ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1 MESON_TEST_ITERATION=1 RUST_BACKTRACE=1 MALLOC_PERTURB_=186 SPEED=thorough LD_LIBRARY_PATH=/mnt/git/qemu_/build/contrib/plugins:/mnt/git/qemu_/build/tests/tcg/plugins MSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 QEMU_TEST_QEMU_BINARY=/mnt/git/qemu_/build/qemu-system-aarch64 QEMU_BUILD_ROOT=/mnt/git/qemu_/build UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 PYTHONPATH=/mnt/git/qemu_/python:/mnt/git/qemu_/tests/functional /mnt/git/qemu_/build/pyvenv/bin/python3 /mnt/git/qemu_/tests/functional/aarch64/test_reverse_debug.py
TAP version 13
ok 1 test_reverse_debug.ReverseDebugging_AArch64.test_aarch64_virt # SKIP Missing env var(s): QEMU_TEST_GDB
1..1
gromero@gromero0:/mnt/git/qemu_/build$ echo $QEMU_TEST_GDB

gromero@gromero0:/mnt/git/qemu_/build$ which gdb-multiarch
/usr/bin/gdb-multiarch
gromero@gromero0:/mnt/git/qemu_/build$ export QEMU_TEST_GDB=/usr/bin/gdb-multiarch
gromero@gromero0:/mnt/git/qemu_/build$ G_TEST_SLOW=1 QEMU_TEST_QEMU_IMG=/mnt/git/qemu_/build/qemu-img ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1 MESON_TEST_ITERATION=1 RUST_BACKTRACE=1 MALLOC_PERTURB_=186 SPEED=thorough LD_LIBRARY_PATH=/mnt/git/qemu_/build/contrib/plugins:/mnt/git/qemu_/build/tests/tcg/plugins MSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 QEMU_TEST_QEMU_BINARY=/mnt/git/qemu_/build/qemu-system-aarch64 QEMU_BUILD_ROOT=/mnt/git/qemu_/build UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1 PYTHONPATH=/mnt/git/qemu_/python:/mnt/git/qemu_/tests/functional /mnt/git/qemu_/build/pyvenv/bin/python3 /mnt/git/qemu_/tests/functional/aarch64/test_reverse_debug.py
TAP version 13
# $ set debug remote 1
# $ target remote localhost:64512
# Remote debugging using localhost:64512
# 0x0000000040000000 in ?? ()
# $ set debug remote 0
# $ print $pc
# $1 = (void (*)()) 0x40000000
# $ stepi
# 0x0000000040000004 in ?? ()
# $ print $pc
# $2 = (void (*)()) 0x40000004
# $ stepi
# 0x0000000040000008 in ?? ()
[...]


Let me know what you find :) Thanks!


Cheers,
Gustavo