[v3] Record/replay acceptance tests

[PATCH v3 11/11] tests/acceptance: Linux boot test for record/replay

Posted by Pavel Dovgalyuk 4 years, 10 months ago

This patch adds a test for record/replay, which boots Linux
image from the disk and interacts with the network.
The idea and code of this test is borrowed from boot_linux.py
However, currently record/replay works only for x86_64,
therefore other tests were excluded.

Each test consists of the following phases:
 - downloading the disk image
 - recording the execution
 - replaying the execution

Replay does not validates the output, but waits until QEMU
finishes the execution. This is reasonable, because
QEMU usually hangs when replay goes wrong.

Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
---
 0 files changed

diff --git a/MAINTAINERS b/MAINTAINERS
index e9a9ce4f66..97f066a9b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2498,6 +2498,7 @@ F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c
 F: tests/acceptance/replay_kernel.py
+F: tests/acceptance/replay_linux.py
 
 IOVA Tree
 M: Peter Xu <peterx@redhat.com>
diff --git a/tests/acceptance/replay_linux.py b/tests/acceptance/replay_linux.py
new file mode 100644
index 0000000000..328b03bb33
--- /dev/null
+++ b/tests/acceptance/replay_linux.py
@@ -0,0 +1,114 @@
+# Record/replay test that boots a complete Linux system via a cloud image
+#
+# Copyright (c) 2020 ISP RAS
+#
+# Author:
+#  Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import logging
+import time
+
+from avocado.utils import cloudinit
+from avocado.utils import network
+from avocado.utils import vmimage
+from avocado.utils import datadrainer
+from avocado.utils.path import find_command
+from boot_linux import BootLinuxBase
+
+class ReplayLinux(BootLinuxBase):
+    """
+    Boots a Linux system, checking for a successful initialization
+    """
+
+    timeout = 1800
+    chksum = None
+    hdd = 'ide-hd'
+    cd = 'ide-cd'
+    bus = 'ide'
+
+    def setUp(self):
+        super(ReplayLinux, self).setUp()
+        self.boot_path = self.download_boot()
+        self.cloudinit_path = self.download_cloudinit()
+
+    def vm_add_disk(self, vm, path, id, device):
+        bus_string = ''
+        if self.bus:
+            bus_string = ',bus=%s.%d' % (self.bus, id,)
+        vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, id))
+        vm.add_args('-drive',
+            'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
+        vm.add_args('-device',
+            '%s,drive=disk%s-rr%s' % (device, id, bus_string))
+
+    def launch_and_wait(self, record, args, shift):
+        vm = self.get_vm()
+        vm.add_args('-smp', '1')
+        vm.add_args('-m', '1024')
+        vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
+        if args:
+            vm.add_args(*args)
+        self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
+        self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
+        logger = logging.getLogger('replay')
+        if record:
+            logger.info('recording the execution...')
+            mode = 'record'
+        else:
+            logger.info('replaying the execution...')
+            mode = 'replay'
+        replay_path = os.path.join(self.workdir, 'replay.bin')
+        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s' %
+                    (shift, mode, replay_path))
+
+        start_time = time.time()
+
+        vm.set_console()
+        vm.launch()
+        console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
+                                    logger=self.log.getChild('console'),
+                                    stop_check=(lambda : not vm.is_running()))
+        console_drainer.start()
+        if record:
+            cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
+                                          self.name)
+            vm.shutdown()
+            logger.info('finished the recording with log size %s bytes'
+                % os.path.getsize(replay_path))
+        else:
+            vm.wait()
+            logger.info('successfully fihished the replay')
+        elapsed = time.time() - start_time
+        logger.info('elapsed time %.2f sec' % elapsed)
+        return elapsed
+
+    def run_rr(self, args=None, shift=7):
+        t1 = self.launch_and_wait(True, args, shift)
+        t2 = self.launch_and_wait(False, args, shift)
+        logger = logging.getLogger('replay')
+        logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
+
+class ReplayLinuxX8664(ReplayLinux):
+    """
+    :avocado: tags=arch:x86_64
+    """
+
+    chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
+
+    def test_pc_i440fx(self):
+        """
+        :avocado: tags=machine:pc
+        :avocado: tags=accel:tcg
+        """
+        self.run_rr(shift=1)
+
+    def test_pc_q35(self):
+        """
+        :avocado: tags=machine:q35
+        :avocado: tags=accel:tcg
+        """
+        self.run_rr(shift=3)

Re: [PATCH v3 11/11] tests/acceptance: Linux boot test for record/replay

Posted by Philippe Mathieu-Daudé 4 years, 9 months ago

Hi Pavel,

On 5/29/20 9:05 AM, Pavel Dovgalyuk wrote:
> This patch adds a test for record/replay, which boots Linux
> image from the disk and interacts with the network.
> The idea and code of this test is borrowed from boot_linux.py
> However, currently record/replay works only for x86_64,
> therefore other tests were excluded.
> 
> Each test consists of the following phases:
>  - downloading the disk image
>  - recording the execution
>  - replaying the execution
> 
> Replay does not validates the output, but waits until QEMU
> finishes the execution. This is reasonable, because
> QEMU usually hangs when replay goes wrong.
> 
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
> ---
>  0 files changed
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e9a9ce4f66..97f066a9b2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2498,6 +2498,7 @@ F: include/sysemu/replay.h
>  F: docs/replay.txt
>  F: stubs/replay.c
>  F: tests/acceptance/replay_kernel.py
> +F: tests/acceptance/replay_linux.py
>  
>  IOVA Tree
>  M: Peter Xu <peterx@redhat.com>
> diff --git a/tests/acceptance/replay_linux.py b/tests/acceptance/replay_linux.py
> new file mode 100644
> index 0000000000..328b03bb33
> --- /dev/null
> +++ b/tests/acceptance/replay_linux.py
> @@ -0,0 +1,114 @@
> +# Record/replay test that boots a complete Linux system via a cloud image
> +#
> +# Copyright (c) 2020 ISP RAS
> +#
> +# Author:
> +#  Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later.  See the COPYING file in the top-level directory.
> +
> +import os
> +import logging
> +import time
> +
> +from avocado.utils import cloudinit
> +from avocado.utils import network
> +from avocado.utils import vmimage
> +from avocado.utils import datadrainer
> +from avocado.utils.path import find_command
> +from boot_linux import BootLinuxBase
> +
> +class ReplayLinux(BootLinuxBase):
> +    """
> +    Boots a Linux system, checking for a successful initialization
> +    """
> +
> +    timeout = 1800

This test works, but is taking way too long for the default
test suite.

I said in the cover I'd use ...:

@skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')

... to skip on Travis, but I'll change by declaring this test
'slow'. We could use 'tags=slowness:high' but then we need to
modify the 'check-acceptance' default rule to skip tests
matching the tag.

Willian, Cleber, any clever idea?

> +    chksum = None
> +    hdd = 'ide-hd'
> +    cd = 'ide-cd'
> +    bus = 'ide'
> +
> +    def setUp(self):
> +        super(ReplayLinux, self).setUp()
> +        self.boot_path = self.download_boot()
> +        self.cloudinit_path = self.download_cloudinit()
> +
> +    def vm_add_disk(self, vm, path, id, device):
> +        bus_string = ''
> +        if self.bus:
> +            bus_string = ',bus=%s.%d' % (self.bus, id,)
> +        vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, id))
> +        vm.add_args('-drive',
> +            'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
> +        vm.add_args('-device',
> +            '%s,drive=disk%s-rr%s' % (device, id, bus_string))
> +
> +    def launch_and_wait(self, record, args, shift):
> +        vm = self.get_vm()
> +        vm.add_args('-smp', '1')
> +        vm.add_args('-m', '1024')
> +        vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
> +        if args:
> +            vm.add_args(*args)
> +        self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
> +        self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
> +        logger = logging.getLogger('replay')
> +        if record:
> +            logger.info('recording the execution...')
> +            mode = 'record'
> +        else:
> +            logger.info('replaying the execution...')
> +            mode = 'replay'
> +        replay_path = os.path.join(self.workdir, 'replay.bin')
> +        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s' %
> +                    (shift, mode, replay_path))
> +
> +        start_time = time.time()
> +
> +        vm.set_console()
> +        vm.launch()
> +        console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
> +                                    logger=self.log.getChild('console'),
> +                                    stop_check=(lambda : not vm.is_running()))
> +        console_drainer.start()
> +        if record:
> +            cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
> +                                          self.name)
> +            vm.shutdown()
> +            logger.info('finished the recording with log size %s bytes'
> +                % os.path.getsize(replay_path))
> +        else:
> +            vm.wait()
> +            logger.info('successfully fihished the replay')
> +        elapsed = time.time() - start_time
> +        logger.info('elapsed time %.2f sec' % elapsed)
> +        return elapsed
> +
> +    def run_rr(self, args=None, shift=7):
> +        t1 = self.launch_and_wait(True, args, shift)
> +        t2 = self.launch_and_wait(False, args, shift)
> +        logger = logging.getLogger('replay')
> +        logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
> +
> +class ReplayLinuxX8664(ReplayLinux):
> +    """
> +    :avocado: tags=arch:x86_64
> +    """
> +
> +    chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
> +
> +    def test_pc_i440fx(self):
> +        """
> +        :avocado: tags=machine:pc
> +        :avocado: tags=accel:tcg
> +        """
> +        self.run_rr(shift=1)
> +
> +    def test_pc_q35(self):
> +        """
> +        :avocado: tags=machine:q35
> +        :avocado: tags=accel:tcg
> +        """
> +        self.run_rr(shift=3)
>

Re: [PATCH v3 11/11] tests/acceptance: Linux boot test for record/replay

Posted by Philippe Mathieu-Daudé 4 years, 9 months ago

On 6/22/20 10:03 AM, Philippe Mathieu-Daudé wrote:
> Hi Pavel,
> 
> On 5/29/20 9:05 AM, Pavel Dovgalyuk wrote:
>> This patch adds a test for record/replay, which boots Linux
>> image from the disk and interacts with the network.
>> The idea and code of this test is borrowed from boot_linux.py
>> However, currently record/replay works only for x86_64,
>> therefore other tests were excluded.
>>
>> Each test consists of the following phases:
>>  - downloading the disk image
>>  - recording the execution
>>  - replaying the execution
>>
>> Replay does not validates the output, but waits until QEMU
>> finishes the execution. This is reasonable, because
>> QEMU usually hangs when replay goes wrong.
>>
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>> ---
>>  0 files changed
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index e9a9ce4f66..97f066a9b2 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -2498,6 +2498,7 @@ F: include/sysemu/replay.h
>>  F: docs/replay.txt
>>  F: stubs/replay.c
>>  F: tests/acceptance/replay_kernel.py
>> +F: tests/acceptance/replay_linux.py
>>  
>>  IOVA Tree
>>  M: Peter Xu <peterx@redhat.com>
>> diff --git a/tests/acceptance/replay_linux.py b/tests/acceptance/replay_linux.py
>> new file mode 100644
>> index 0000000000..328b03bb33
>> --- /dev/null
>> +++ b/tests/acceptance/replay_linux.py
>> @@ -0,0 +1,114 @@
>> +# Record/replay test that boots a complete Linux system via a cloud image
>> +#
>> +# Copyright (c) 2020 ISP RAS
>> +#
>> +# Author:
>> +#  Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>> +#
>> +# This work is licensed under the terms of the GNU GPL, version 2 or
>> +# later.  See the COPYING file in the top-level directory.
>> +
>> +import os
>> +import logging
>> +import time
>> +
>> +from avocado.utils import cloudinit
>> +from avocado.utils import network
>> +from avocado.utils import vmimage
>> +from avocado.utils import datadrainer
>> +from avocado.utils.path import find_command
>> +from boot_linux import BootLinuxBase
>> +
>> +class ReplayLinux(BootLinuxBase):
>> +    """
>> +    Boots a Linux system, checking for a successful initialization
>> +    """
>> +
>> +    timeout = 1800
> 
> This test works, but is taking way too long for the default
> test suite.
> 
> I said in the cover I'd use ...:
> 
> @skipIf(os.getenv('CONTINUOUS_INTEGRATION'), 'Running on Travis-CI')
> 
> ... to skip on Travis, but I'll change by declaring this test
> 'slow'. We could use 'tags=slowness:high' but then we need to
> modify the 'check-acceptance' default rule to skip tests
> matching the tag.
> 
> Willian, Cleber, any clever idea?

Meanwhile I'm taking this patch out of the queue, so it
won't delay the integration of the other tests.

> 
>> +    chksum = None
>> +    hdd = 'ide-hd'
>> +    cd = 'ide-cd'
>> +    bus = 'ide'
>> +
>> +    def setUp(self):
>> +        super(ReplayLinux, self).setUp()
>> +        self.boot_path = self.download_boot()
>> +        self.cloudinit_path = self.download_cloudinit()
>> +
>> +    def vm_add_disk(self, vm, path, id, device):
>> +        bus_string = ''
>> +        if self.bus:
>> +            bus_string = ',bus=%s.%d' % (self.bus, id,)
>> +        vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, id))
>> +        vm.add_args('-drive',
>> +            'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
>> +        vm.add_args('-device',
>> +            '%s,drive=disk%s-rr%s' % (device, id, bus_string))
>> +
>> +    def launch_and_wait(self, record, args, shift):
>> +        vm = self.get_vm()
>> +        vm.add_args('-smp', '1')
>> +        vm.add_args('-m', '1024')
>> +        vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
>> +        if args:
>> +            vm.add_args(*args)
>> +        self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
>> +        self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
>> +        logger = logging.getLogger('replay')
>> +        if record:
>> +            logger.info('recording the execution...')
>> +            mode = 'record'
>> +        else:
>> +            logger.info('replaying the execution...')
>> +            mode = 'replay'
>> +        replay_path = os.path.join(self.workdir, 'replay.bin')
>> +        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s' %
>> +                    (shift, mode, replay_path))
>> +
>> +        start_time = time.time()
>> +
>> +        vm.set_console()
>> +        vm.launch()
>> +        console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
>> +                                    logger=self.log.getChild('console'),
>> +                                    stop_check=(lambda : not vm.is_running()))
>> +        console_drainer.start()
>> +        if record:
>> +            cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
>> +                                          self.name)
>> +            vm.shutdown()
>> +            logger.info('finished the recording with log size %s bytes'
>> +                % os.path.getsize(replay_path))
>> +        else:
>> +            vm.wait()
>> +            logger.info('successfully fihished the replay')
>> +        elapsed = time.time() - start_time
>> +        logger.info('elapsed time %.2f sec' % elapsed)
>> +        return elapsed
>> +
>> +    def run_rr(self, args=None, shift=7):
>> +        t1 = self.launch_and_wait(True, args, shift)
>> +        t2 = self.launch_and_wait(False, args, shift)
>> +        logger = logging.getLogger('replay')
>> +        logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
>> +
>> +class ReplayLinuxX8664(ReplayLinux):
>> +    """
>> +    :avocado: tags=arch:x86_64
>> +    """
>> +
>> +    chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
>> +
>> +    def test_pc_i440fx(self):
>> +        """
>> +        :avocado: tags=machine:pc
>> +        :avocado: tags=accel:tcg
>> +        """
>> +        self.run_rr(shift=1)
>> +
>> +    def test_pc_q35(self):
>> +        """
>> +        :avocado: tags=machine:q35
>> +        :avocado: tags=accel:tcg
>> +        """
>> +        self.run_rr(shift=3)
>>
>