diff mbox series

[v7,14/14] tests/acceptance: add reverse debugging test

Message ID 160174524678.12451.13258942849173670277.stgit@pasha-ThinkPad-X280
State New
Headers show
Series Reverse debugging | expand

Commit Message

Pavel Dovgalyuk Oct. 3, 2020, 5:14 p.m. UTC
From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>

This is a test for GDB reverse debugging commands: reverse step and reverse continue.
Every test in this suite consists of two phases: record and replay.
Recording saves the execution of some instructions and makes an initial
VM snapshot to allow reverse execution.
Replay saves the order of the first instructions and then checks that they
are executed backwards in the correct order.
After that the execution is replayed to the end, and reverse continue
command is checked by setting several breakpoints, and asserting
that the execution is stopped at the last of them.

Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>

--

v5:
 - disabled (as some other tests) when running on gitlab
   due to the unidentified timeout problem
---
 MAINTAINERS                           |    1 
 tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
 2 files changed, 209 insertions(+)
 create mode 100644 tests/acceptance/reverse_debugging.py

Comments

Cleber Rosa Oct. 6, 2020, 1:36 p.m. UTC | #1
On Sat, Oct 03, 2020 at 08:14:06PM +0300, Pavel Dovgalyuk wrote:
> From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>
> 
> This is a test for GDB reverse debugging commands: reverse step and reverse continue.
> Every test in this suite consists of two phases: record and replay.
> Recording saves the execution of some instructions and makes an initial
> VM snapshot to allow reverse execution.
> Replay saves the order of the first instructions and then checks that they
> are executed backwards in the correct order.
> After that the execution is replayed to the end, and reverse continue
> command is checked by setting several breakpoints, and asserting
> that the execution is stopped at the last of them.
> 
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> Reviewed-by: Willian Rampazzo <willianr@redhat.com>
> 
> --
> 
> v5:
>  - disabled (as some other tests) when running on gitlab
>    due to the unidentified timeout problem
> ---
>  MAINTAINERS                           |    1 
>  tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
>  2 files changed, 209 insertions(+)
>  create mode 100644 tests/acceptance/reverse_debugging.py
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ea4fa3e481..bd3a7efb75 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2693,6 +2693,7 @@ F: include/sysemu/replay.h
>  F: docs/replay.txt
>  F: stubs/replay.c
>  F: tests/acceptance/replay_kernel.py
> +F: tests/acceptance/reverse_debugging.py
>  F: qapi/replay.json
>  
>  IOVA Tree
> diff --git a/tests/acceptance/reverse_debugging.py b/tests/acceptance/reverse_debugging.py
> new file mode 100644
> index 0000000000..b72fdf6cdc
> --- /dev/null
> +++ b/tests/acceptance/reverse_debugging.py
> @@ -0,0 +1,208 @@
> +# Reverse debugging test
> +#
> +# Copyright (c) 2020 ISP RAS
> +#
> +# Author:
> +#  Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later.  See the COPYING file in the top-level directory.
> +import os
> +import logging
> +
> +from avocado import skipIf
> +from avocado_qemu import BUILD_DIR
> +from avocado.utils import gdb
> +from avocado.utils import process
> +from avocado.utils.path import find_command
> +from boot_linux_console import LinuxKernelTest
> +
> +class ReverseDebugging(LinuxKernelTest):
> +    """
> +    Test GDB reverse debugging commands: reverse step and reverse continue.
> +    Recording saves the execution of some instructions and makes an initial
> +    VM snapshot to allow reverse execution.
> +    Replay saves the order of the first instructions and then checks that they
> +    are executed backwards in the correct order.
> +    After that the execution is replayed to the end, and reverse continue
> +    command is checked by setting several breakpoints, and asserting
> +    that the execution is stopped at the last of them.
> +    """
> +
> +    timeout = 10
> +    STEPS = 10
> +    endian_is_le = True

Have you attmepted a "be" test too?  I'm curious about why this is
defined (and used later) but it's never used as `False`.

> +
> +    def run_vm(self, record, shift, args, replay_path, image_path):
> +        logger = logging.getLogger('replay')
> +        vm = self.get_vm()
> +        vm.set_console()
> +        if record:
> +            logger.info('recording the execution...')
> +            mode = 'record'
> +        else:
> +            logger.info('replaying the execution...')
> +            mode = 'replay'
> +            vm.add_args('-s', '-S')
> +        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
> +                    (shift, mode, replay_path),
> +                    '-net', 'none')
> +        vm.add_args('-drive', 'file=%s,if=none' % image_path)
> +        if args:
> +            vm.add_args(*args)
> +        vm.launch()
> +        return vm
> +
> +    @staticmethod
> +    def get_reg_le(g, reg):
> +        res = g.cmd(b'p%x' % reg)
> +        num = 0
> +        for i in range(len(res))[-2::-2]:
> +            num = 0x100 * num + int(res[i:i + 2], 16)
> +        return num
> +
> +    @staticmethod
> +    def get_reg_be(g, reg):
> +        res = g.cmd(b'p%x' % reg)
> +        return int(res, 16)
> +
> +    def get_reg(self, g, reg):
> +        # value may be encoded in BE or LE order
> +        if self.endian_is_le:
> +            return self.get_reg_le(g, reg)
> +        else:
> +            return self.get_reg_be(g, reg)
> +
> +    def get_pc(self, g):
> +        return self.get_reg(g, self.REG_PC)
> +
> +    def check_pc(self, g, addr):
> +        pc = self.get_pc(g)
> +        if pc != addr:
> +            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
> +
> +    @staticmethod
> +    def gdb_step(g):
> +        g.cmd(b's', b'T05thread:01;')
> +
> +    @staticmethod
> +    def gdb_bstep(g):
> +        g.cmd(b'bs', b'T05thread:01;')
> +
> +    @staticmethod
> +    def vm_get_icount(vm):
> +        return vm.qmp('query-replay')['return']['icount']
> +
> +    def reverse_debugging(self, shift=7, args=None):
> +        logger = logging.getLogger('replay')
> +
> +        # create qcow2 for snapshots
> +        logger.info('creating qcow2 image for VM snapshots')
> +        image_path = os.path.join(self.workdir, 'disk.qcow2')
> +        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
> +        if not os.path.exists(qemu_img):
> +            qemu_img = find_command('qemu-img', False)
> +        if qemu_img is False:
> +            self.cancel('Could not find "qemu-img", which is required to '
> +                        'create the temporary qcow2 image')

This snippet is clearly modeled after the snippet in
`boot_linux.BootLinuxBase.download_boot()`.  I'm adding an action
item to create a generic utility:

  https://gitlab.com/cleber.gnu/qemu/-/issues/1

> +        cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
> +        process.run(cmd)
> +
> +        replay_path = os.path.join(self.workdir, 'replay.bin')
> +
> +        # record the log
> +        vm = self.run_vm(True, shift, args, replay_path, image_path)
> +        while self.vm_get_icount(vm) <= self.STEPS:
> +            pass
> +        last_icount = self.vm_get_icount(vm)
> +        vm.shutdown()
> +
> +        logger.info("recorded log with %s+ steps" % last_icount)
> +
> +        # replay and run debug commands
> +        vm = self.run_vm(False, shift, args, replay_path, image_path)
> +        logger.info('connecting to gdbstub')
> +        g = gdb.GDBRemote('127.0.0.1', 1234, False, False)
> +        g.connect()
> +        r = g.cmd(b'qSupported')
> +        if b'qXfer:features:read+' in r:
> +            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
> +        if b'ReverseStep+' not in r:
> +            self.fail('Reverse step is not supported by QEMU')
> +        if b'ReverseContinue+' not in r:
> +            self.fail('Reverse continue is not supported by QEMU')
> +
> +        logger.info('stepping forward')
> +        steps = []
> +        # record first instruction addresses
> +        for _ in range(self.STEPS):
> +            pc = self.get_pc(g)
> +            logger.info('saving position %x' % pc)
> +            steps.append(pc)
> +            self.gdb_step(g)

Do you think it'd make sense to have more utility methods, such as
`step()` and `bstep()` in `avocado.utils.gdb.GDBRemote` itself?

> +
> +        # visit the recorded instruction in reverse order
> +        logger.info('stepping backward')
> +        for addr in steps[::-1]:
> +            self.gdb_bstep(g)
> +            self.check_pc(g, addr)
> +            logger.info('found position %x' % addr)
> +
> +        logger.info('seeking to the end (icount %s)' % (last_icount - 1))
> +        vm.qmp('replay-break', icount=last_icount - 1)
> +        # continue - will return after pausing
> +        g.cmd(b'c', b'T02thread:01;')
> +
> +        logger.info('setting breakpoints')
> +        for addr in steps:
> +            # hardware breakpoint at addr with len=1
> +            g.cmd(b'Z1,%x,1' % addr, b'OK')
> +
> +        logger.info('running reverse continue to reach %x' % steps[-1])
> +        # reverse continue - will return after stopping at the breakpoint
> +        g.cmd(b'bc', b'T05thread:01;')
> +
> +        # assume that none of the first instructions is executed again
> +        # breaking the order of the breakpoints
> +        self.check_pc(g, steps[-1])
> +        logger.info('successfully reached %x' % steps[-1])
> +
> +        logger.info('exitting gdb and qemu')
> +        vm.shutdown()
> +
> +class ReverseDebugging_X86_64(ReverseDebugging):
> +    REG_PC = 0x10
> +    REG_CS = 0x12
> +    def get_pc(self, g):
> +        return self.get_reg_le(g, self.REG_PC) \
> +            + self.get_reg_le(g, self.REG_CS) * 0x10
> +

Nitpick: avoid backslash as line continuation, and favor parentheses,
that is:

    def get_pc(self, g):
        return (self.get_reg_le(g, self.REG_PC) +
                self.get_reg_le(g, self.REG_CS) * 0x10)

Despite the comments and questions, this LGTM:

Reviewed-by: Cleber Rosa <crosa@redhat.com>
Tested-by: Cleber Rosa <crosa@redhat.com>
Pavel Dovgalyuk Oct. 6, 2020, 3:09 p.m. UTC | #2
On 06.10.2020 16:36, Cleber Rosa wrote:
> On Sat, Oct 03, 2020 at 08:14:06PM +0300, Pavel Dovgalyuk wrote:
>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>
>>
>> This is a test for GDB reverse debugging commands: reverse step and reverse continue.
>> Every test in this suite consists of two phases: record and replay.
>> Recording saves the execution of some instructions and makes an initial
>> VM snapshot to allow reverse execution.
>> Replay saves the order of the first instructions and then checks that they
>> are executed backwards in the correct order.
>> After that the execution is replayed to the end, and reverse continue
>> command is checked by setting several breakpoints, and asserting
>> that the execution is stopped at the last of them.
>>
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
>> Reviewed-by: Willian Rampazzo <willianr@redhat.com>
>>
>> --
>>
>> v5:
>>   - disabled (as some other tests) when running on gitlab
>>     due to the unidentified timeout problem
>> ---
>>   MAINTAINERS                           |    1
>>   tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
>>   2 files changed, 209 insertions(+)
>>   create mode 100644 tests/acceptance/reverse_debugging.py
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index ea4fa3e481..bd3a7efb75 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -2693,6 +2693,7 @@ F: include/sysemu/replay.h
>>   F: docs/replay.txt
>>   F: stubs/replay.c
>>   F: tests/acceptance/replay_kernel.py
>> +F: tests/acceptance/reverse_debugging.py
>>   F: qapi/replay.json
>>   
>>   IOVA Tree
>> diff --git a/tests/acceptance/reverse_debugging.py b/tests/acceptance/reverse_debugging.py
>> new file mode 100644
>> index 0000000000..b72fdf6cdc
>> --- /dev/null
>> +++ b/tests/acceptance/reverse_debugging.py
>> @@ -0,0 +1,208 @@
>> +# Reverse debugging test
>> +#
>> +# Copyright (c) 2020 ISP RAS
>> +#
>> +# Author:
>> +#  Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> +#
>> +# This work is licensed under the terms of the GNU GPL, version 2 or
>> +# later.  See the COPYING file in the top-level directory.
>> +import os
>> +import logging
>> +
>> +from avocado import skipIf
>> +from avocado_qemu import BUILD_DIR
>> +from avocado.utils import gdb
>> +from avocado.utils import process
>> +from avocado.utils.path import find_command
>> +from boot_linux_console import LinuxKernelTest
>> +
>> +class ReverseDebugging(LinuxKernelTest):
>> +    """
>> +    Test GDB reverse debugging commands: reverse step and reverse continue.
>> +    Recording saves the execution of some instructions and makes an initial
>> +    VM snapshot to allow reverse execution.
>> +    Replay saves the order of the first instructions and then checks that they
>> +    are executed backwards in the correct order.
>> +    After that the execution is replayed to the end, and reverse continue
>> +    command is checked by setting several breakpoints, and asserting
>> +    that the execution is stopped at the last of them.
>> +    """
>> +
>> +    timeout = 10
>> +    STEPS = 10
>> +    endian_is_le = True
> 
> Have you attmepted a "be" test too?  I'm curious about why this is
> defined (and used later) but it's never used as `False`.

It was intended to be used with PPC, but PPCs record-replay is not 
reliable enough.

> 
>> +
>> +    def run_vm(self, record, shift, args, replay_path, image_path):
>> +        logger = logging.getLogger('replay')
>> +        vm = self.get_vm()
>> +        vm.set_console()
>> +        if record:
>> +            logger.info('recording the execution...')
>> +            mode = 'record'
>> +        else:
>> +            logger.info('replaying the execution...')
>> +            mode = 'replay'
>> +            vm.add_args('-s', '-S')
>> +        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
>> +                    (shift, mode, replay_path),
>> +                    '-net', 'none')
>> +        vm.add_args('-drive', 'file=%s,if=none' % image_path)
>> +        if args:
>> +            vm.add_args(*args)
>> +        vm.launch()
>> +        return vm
>> +
>> +    @staticmethod
>> +    def get_reg_le(g, reg):
>> +        res = g.cmd(b'p%x' % reg)
>> +        num = 0
>> +        for i in range(len(res))[-2::-2]:
>> +            num = 0x100 * num + int(res[i:i + 2], 16)
>> +        return num
>> +
>> +    @staticmethod
>> +    def get_reg_be(g, reg):
>> +        res = g.cmd(b'p%x' % reg)
>> +        return int(res, 16)
>> +
>> +    def get_reg(self, g, reg):
>> +        # value may be encoded in BE or LE order
>> +        if self.endian_is_le:
>> +            return self.get_reg_le(g, reg)
>> +        else:
>> +            return self.get_reg_be(g, reg)
>> +
>> +    def get_pc(self, g):
>> +        return self.get_reg(g, self.REG_PC)
>> +
>> +    def check_pc(self, g, addr):
>> +        pc = self.get_pc(g)
>> +        if pc != addr:
>> +            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
>> +
>> +    @staticmethod
>> +    def gdb_step(g):
>> +        g.cmd(b's', b'T05thread:01;')
>> +
>> +    @staticmethod
>> +    def gdb_bstep(g):
>> +        g.cmd(b'bs', b'T05thread:01;')
>> +
>> +    @staticmethod
>> +    def vm_get_icount(vm):
>> +        return vm.qmp('query-replay')['return']['icount']
>> +
>> +    def reverse_debugging(self, shift=7, args=None):
>> +        logger = logging.getLogger('replay')
>> +
>> +        # create qcow2 for snapshots
>> +        logger.info('creating qcow2 image for VM snapshots')
>> +        image_path = os.path.join(self.workdir, 'disk.qcow2')
>> +        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
>> +        if not os.path.exists(qemu_img):
>> +            qemu_img = find_command('qemu-img', False)
>> +        if qemu_img is False:
>> +            self.cancel('Could not find "qemu-img", which is required to '
>> +                        'create the temporary qcow2 image')
> 
> This snippet is clearly modeled after the snippet in
> `boot_linux.BootLinuxBase.download_boot()`.  I'm adding an action
> item to create a generic utility:
> 
>    https://gitlab.com/cleber.gnu/qemu/-/issues/1
> 
>> +        cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
>> +        process.run(cmd)
>> +
>> +        replay_path = os.path.join(self.workdir, 'replay.bin')
>> +
>> +        # record the log
>> +        vm = self.run_vm(True, shift, args, replay_path, image_path)
>> +        while self.vm_get_icount(vm) <= self.STEPS:
>> +            pass
>> +        last_icount = self.vm_get_icount(vm)
>> +        vm.shutdown()
>> +
>> +        logger.info("recorded log with %s+ steps" % last_icount)
>> +
>> +        # replay and run debug commands
>> +        vm = self.run_vm(False, shift, args, replay_path, image_path)
>> +        logger.info('connecting to gdbstub')
>> +        g = gdb.GDBRemote('127.0.0.1', 1234, False, False)
>> +        g.connect()
>> +        r = g.cmd(b'qSupported')
>> +        if b'qXfer:features:read+' in r:
>> +            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
>> +        if b'ReverseStep+' not in r:
>> +            self.fail('Reverse step is not supported by QEMU')
>> +        if b'ReverseContinue+' not in r:
>> +            self.fail('Reverse continue is not supported by QEMU')
>> +
>> +        logger.info('stepping forward')
>> +        steps = []
>> +        # record first instruction addresses
>> +        for _ in range(self.STEPS):
>> +            pc = self.get_pc(g)
>> +            logger.info('saving position %x' % pc)
>> +            steps.append(pc)
>> +            self.gdb_step(g)
> 
> Do you think it'd make sense to have more utility methods, such as
> `step()` and `bstep()` in `avocado.utils.gdb.GDBRemote` itself?

I thought about it, but it was easier to not have the dependency on 
newer avocado version.
But now we can move these functions into avocado in two steps.

> 
>> +
>> +        # visit the recorded instruction in reverse order
>> +        logger.info('stepping backward')
>> +        for addr in steps[::-1]:
>> +            self.gdb_bstep(g)
>> +            self.check_pc(g, addr)
>> +            logger.info('found position %x' % addr)
>> +
>> +        logger.info('seeking to the end (icount %s)' % (last_icount - 1))
>> +        vm.qmp('replay-break', icount=last_icount - 1)
>> +        # continue - will return after pausing
>> +        g.cmd(b'c', b'T02thread:01;')
>> +
>> +        logger.info('setting breakpoints')
>> +        for addr in steps:
>> +            # hardware breakpoint at addr with len=1
>> +            g.cmd(b'Z1,%x,1' % addr, b'OK')
>> +
>> +        logger.info('running reverse continue to reach %x' % steps[-1])
>> +        # reverse continue - will return after stopping at the breakpoint
>> +        g.cmd(b'bc', b'T05thread:01;')
>> +
>> +        # assume that none of the first instructions is executed again
>> +        # breaking the order of the breakpoints
>> +        self.check_pc(g, steps[-1])
>> +        logger.info('successfully reached %x' % steps[-1])
>> +
>> +        logger.info('exitting gdb and qemu')
>> +        vm.shutdown()
>> +
>> +class ReverseDebugging_X86_64(ReverseDebugging):
>> +    REG_PC = 0x10
>> +    REG_CS = 0x12
>> +    def get_pc(self, g):
>> +        return self.get_reg_le(g, self.REG_PC) \
>> +            + self.get_reg_le(g, self.REG_CS) * 0x10
>> +
> 
> Nitpick: avoid backslash as line continuation, and favor parentheses,
> that is:
> 
>      def get_pc(self, g):
>          return (self.get_reg_le(g, self.REG_PC) +
>                  self.get_reg_le(g, self.REG_CS) * 0x10)
> 
> Despite the comments and questions, this LGTM:
> 
> Reviewed-by: Cleber Rosa <crosa@redhat.com>
> Tested-by: Cleber Rosa <crosa@redhat.com>
>
Cleber Rosa Oct. 6, 2020, 6:16 p.m. UTC | #3
On Tue, Oct 06, 2020 at 06:09:55PM +0300, Pavel Dovgalyuk wrote:
> On 06.10.2020 16:36, Cleber Rosa wrote:
> > On Sat, Oct 03, 2020 at 08:14:06PM +0300, Pavel Dovgalyuk wrote:
> > > From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>
> > > 
> > > This is a test for GDB reverse debugging commands: reverse step and reverse continue.
> > > Every test in this suite consists of two phases: record and replay.
> > > Recording saves the execution of some instructions and makes an initial
> > > VM snapshot to allow reverse execution.
> > > Replay saves the order of the first instructions and then checks that they
> > > are executed backwards in the correct order.
> > > After that the execution is replayed to the end, and reverse continue
> > > command is checked by setting several breakpoints, and asserting
> > > that the execution is stopped at the last of them.
> > > 
> > > Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> > > Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> > > Reviewed-by: Willian Rampazzo <willianr@redhat.com>
> > > 
> > > --
> > > 
> > > v5:
> > >   - disabled (as some other tests) when running on gitlab
> > >     due to the unidentified timeout problem
> > > ---
> > >   MAINTAINERS                           |    1
> > >   tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
> > >   2 files changed, 209 insertions(+)
> > >   create mode 100644 tests/acceptance/reverse_debugging.py
> > > 
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index ea4fa3e481..bd3a7efb75 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -2693,6 +2693,7 @@ F: include/sysemu/replay.h
> > >   F: docs/replay.txt
> > >   F: stubs/replay.c
> > >   F: tests/acceptance/replay_kernel.py
> > > +F: tests/acceptance/reverse_debugging.py
> > >   F: qapi/replay.json
> > >   IOVA Tree
> > > diff --git a/tests/acceptance/reverse_debugging.py b/tests/acceptance/reverse_debugging.py
> > > new file mode 100644
> > > index 0000000000..b72fdf6cdc
> > > --- /dev/null
> > > +++ b/tests/acceptance/reverse_debugging.py
> > > @@ -0,0 +1,208 @@
> > > +# Reverse debugging test
> > > +#
> > > +# Copyright (c) 2020 ISP RAS
> > > +#
> > > +# Author:
> > > +#  Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> > > +#
> > > +# This work is licensed under the terms of the GNU GPL, version 2 or
> > > +# later.  See the COPYING file in the top-level directory.
> > > +import os
> > > +import logging
> > > +
> > > +from avocado import skipIf
> > > +from avocado_qemu import BUILD_DIR
> > > +from avocado.utils import gdb
> > > +from avocado.utils import process
> > > +from avocado.utils.path import find_command
> > > +from boot_linux_console import LinuxKernelTest
> > > +
> > > +class ReverseDebugging(LinuxKernelTest):
> > > +    """
> > > +    Test GDB reverse debugging commands: reverse step and reverse continue.
> > > +    Recording saves the execution of some instructions and makes an initial
> > > +    VM snapshot to allow reverse execution.
> > > +    Replay saves the order of the first instructions and then checks that they
> > > +    are executed backwards in the correct order.
> > > +    After that the execution is replayed to the end, and reverse continue
> > > +    command is checked by setting several breakpoints, and asserting
> > > +    that the execution is stopped at the last of them.
> > > +    """
> > > +
> > > +    timeout = 10
> > > +    STEPS = 10
> > > +    endian_is_le = True
> > 
> > Have you attmepted a "be" test too?  I'm curious about why this is
> > defined (and used later) but it's never used as `False`.
> 
> It was intended to be used with PPC, but PPCs record-replay is not reliable
> enough.
>

OK, thanks for the explanation.

> > 
> > > +
> > > +    def run_vm(self, record, shift, args, replay_path, image_path):
> > > +        logger = logging.getLogger('replay')
> > > +        vm = self.get_vm()
> > > +        vm.set_console()
> > > +        if record:
> > > +            logger.info('recording the execution...')
> > > +            mode = 'record'
> > > +        else:
> > > +            logger.info('replaying the execution...')
> > > +            mode = 'replay'
> > > +            vm.add_args('-s', '-S')
> > > +        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
> > > +                    (shift, mode, replay_path),
> > > +                    '-net', 'none')
> > > +        vm.add_args('-drive', 'file=%s,if=none' % image_path)
> > > +        if args:
> > > +            vm.add_args(*args)
> > > +        vm.launch()
> > > +        return vm
> > > +
> > > +    @staticmethod
> > > +    def get_reg_le(g, reg):
> > > +        res = g.cmd(b'p%x' % reg)
> > > +        num = 0
> > > +        for i in range(len(res))[-2::-2]:
> > > +            num = 0x100 * num + int(res[i:i + 2], 16)
> > > +        return num
> > > +
> > > +    @staticmethod
> > > +    def get_reg_be(g, reg):
> > > +        res = g.cmd(b'p%x' % reg)
> > > +        return int(res, 16)
> > > +
> > > +    def get_reg(self, g, reg):
> > > +        # value may be encoded in BE or LE order
> > > +        if self.endian_is_le:
> > > +            return self.get_reg_le(g, reg)
> > > +        else:
> > > +            return self.get_reg_be(g, reg)
> > > +
> > > +    def get_pc(self, g):
> > > +        return self.get_reg(g, self.REG_PC)
> > > +
> > > +    def check_pc(self, g, addr):
> > > +        pc = self.get_pc(g)
> > > +        if pc != addr:
> > > +            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
> > > +
> > > +    @staticmethod
> > > +    def gdb_step(g):
> > > +        g.cmd(b's', b'T05thread:01;')
> > > +
> > > +    @staticmethod
> > > +    def gdb_bstep(g):
> > > +        g.cmd(b'bs', b'T05thread:01;')
> > > +
> > > +    @staticmethod
> > > +    def vm_get_icount(vm):
> > > +        return vm.qmp('query-replay')['return']['icount']
> > > +
> > > +    def reverse_debugging(self, shift=7, args=None):
> > > +        logger = logging.getLogger('replay')
> > > +
> > > +        # create qcow2 for snapshots
> > > +        logger.info('creating qcow2 image for VM snapshots')
> > > +        image_path = os.path.join(self.workdir, 'disk.qcow2')
> > > +        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
> > > +        if not os.path.exists(qemu_img):
> > > +            qemu_img = find_command('qemu-img', False)
> > > +        if qemu_img is False:
> > > +            self.cancel('Could not find "qemu-img", which is required to '
> > > +                        'create the temporary qcow2 image')
> > 
> > This snippet is clearly modeled after the snippet in
> > `boot_linux.BootLinuxBase.download_boot()`.  I'm adding an action
> > item to create a generic utility:
> > 
> >    https://gitlab.com/cleber.gnu/qemu/-/issues/1
> > 
> > > +        cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
> > > +        process.run(cmd)
> > > +
> > > +        replay_path = os.path.join(self.workdir, 'replay.bin')
> > > +
> > > +        # record the log
> > > +        vm = self.run_vm(True, shift, args, replay_path, image_path)
> > > +        while self.vm_get_icount(vm) <= self.STEPS:
> > > +            pass
> > > +        last_icount = self.vm_get_icount(vm)
> > > +        vm.shutdown()
> > > +
> > > +        logger.info("recorded log with %s+ steps" % last_icount)
> > > +
> > > +        # replay and run debug commands
> > > +        vm = self.run_vm(False, shift, args, replay_path, image_path)
> > > +        logger.info('connecting to gdbstub')
> > > +        g = gdb.GDBRemote('127.0.0.1', 1234, False, False)
> > > +        g.connect()
> > > +        r = g.cmd(b'qSupported')
> > > +        if b'qXfer:features:read+' in r:
> > > +            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
> > > +        if b'ReverseStep+' not in r:
> > > +            self.fail('Reverse step is not supported by QEMU')
> > > +        if b'ReverseContinue+' not in r:
> > > +            self.fail('Reverse continue is not supported by QEMU')
> > > +
> > > +        logger.info('stepping forward')
> > > +        steps = []
> > > +        # record first instruction addresses
> > > +        for _ in range(self.STEPS):
> > > +            pc = self.get_pc(g)
> > > +            logger.info('saving position %x' % pc)
> > > +            steps.append(pc)
> > > +            self.gdb_step(g)
> > 
> > Do you think it'd make sense to have more utility methods, such as
> > `step()` and `bstep()` in `avocado.utils.gdb.GDBRemote` itself?
> 
> I thought about it, but it was easier to not have the dependency on newer
> avocado version.

Agreed.

> But now we can move these functions into avocado in two steps.
>

OK.  I think the versions of these functions in
`avocado.utils.gdb.GDBRemote` can benefit from parsing the reply
packets.  With that, in addition to using a strict expected reponse
(like you've done with b'T05thread:01;') the caller may inspect
only the aspects that it deems important.

For instance, one may be interested in asserting that the signal
was a SIGTRAP, but may not care about the thread ID.

Anyway, I'm opening an issue on the Avocado project page:

   https://github.com/avocado-framework/avocado/issues/4258

If you have ideas about the interface, please let me know.

Thanks,
- Cleber.
Philippe Mathieu-Daudé Oct. 6, 2020, 7:55 p.m. UTC | #4
On 10/3/20 7:14 PM, Pavel Dovgalyuk wrote:
> From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>
> 
> This is a test for GDB reverse debugging commands: reverse step and reverse continue.
> Every test in this suite consists of two phases: record and replay.
> Recording saves the execution of some instructions and makes an initial
> VM snapshot to allow reverse execution.
> Replay saves the order of the first instructions and then checks that they
> are executed backwards in the correct order.
> After that the execution is replayed to the end, and reverse continue
> command is checked by setting several breakpoints, and asserting
> that the execution is stopped at the last of them.
> 
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> Reviewed-by: Willian Rampazzo <willianr@redhat.com>
> 
> --
> 
> v5:
>  - disabled (as some other tests) when running on gitlab
>    due to the unidentified timeout problem
> ---
>  MAINTAINERS                           |    1 
>  tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
>  2 files changed, 209 insertions(+)
>  create mode 100644 tests/acceptance/reverse_debugging.py

Thanks, applied to my acceptance-testing tree.
Philippe Mathieu-Daudé Oct. 7, 2020, 5:42 a.m. UTC | #5
On 10/6/20 9:55 PM, Philippe Mathieu-Daudé wrote:
> On 10/3/20 7:14 PM, Pavel Dovgalyuk wrote:
>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@gmail.com>
>>
>> This is a test for GDB reverse debugging commands: reverse step and reverse continue.
>> Every test in this suite consists of two phases: record and replay.
>> Recording saves the execution of some instructions and makes an initial
>> VM snapshot to allow reverse execution.
>> Replay saves the order of the first instructions and then checks that they
>> are executed backwards in the correct order.
>> After that the execution is replayed to the end, and reverse continue
>> command is checked by setting several breakpoints, and asserting
>> that the execution is stopped at the last of them.
>>
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
>> Reviewed-by: Willian Rampazzo <willianr@redhat.com>
>>
>> --

This separator confuses the git tools. Can you stop using it
and use the '---' one instead?

>>
>> v5:
>>  - disabled (as some other tests) when running on gitlab
>>    due to the unidentified timeout problem

^ this should be stripped automatically but isn't.

>> ---
>>  MAINTAINERS                           |    1 
>>  tests/acceptance/reverse_debugging.py |  208 +++++++++++++++++++++++++++++++++
>>  2 files changed, 209 insertions(+)
>>  create mode 100644 tests/acceptance/reverse_debugging.py
> 
> Thanks, applied to my acceptance-testing tree.

This patch as already be merged by Paolo as commit be52eca309
("tests/acceptance: add reverse debugging test").

The notes below your '--' separator have been merged too.
Dirty, but not a big deal.

Regards,

Phil.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index ea4fa3e481..bd3a7efb75 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2693,6 +2693,7 @@  F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c
 F: tests/acceptance/replay_kernel.py
+F: tests/acceptance/reverse_debugging.py
 F: qapi/replay.json
 
 IOVA Tree
diff --git a/tests/acceptance/reverse_debugging.py b/tests/acceptance/reverse_debugging.py
new file mode 100644
index 0000000000..b72fdf6cdc
--- /dev/null
+++ b/tests/acceptance/reverse_debugging.py
@@ -0,0 +1,208 @@ 
+# Reverse debugging test
+#
+# Copyright (c) 2020 ISP RAS
+#
+# Author:
+#  Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+import os
+import logging
+
+from avocado import skipIf
+from avocado_qemu import BUILD_DIR
+from avocado.utils import gdb
+from avocado.utils import process
+from avocado.utils.path import find_command
+from boot_linux_console import LinuxKernelTest
+
+class ReverseDebugging(LinuxKernelTest):
+    """
+    Test GDB reverse debugging commands: reverse step and reverse continue.
+    Recording saves the execution of some instructions and makes an initial
+    VM snapshot to allow reverse execution.
+    Replay saves the order of the first instructions and then checks that they
+    are executed backwards in the correct order.
+    After that the execution is replayed to the end, and reverse continue
+    command is checked by setting several breakpoints, and asserting
+    that the execution is stopped at the last of them.
+    """
+
+    timeout = 10
+    STEPS = 10
+    endian_is_le = True
+
+    def run_vm(self, record, shift, args, replay_path, image_path):
+        logger = logging.getLogger('replay')
+        vm = self.get_vm()
+        vm.set_console()
+        if record:
+            logger.info('recording the execution...')
+            mode = 'record'
+        else:
+            logger.info('replaying the execution...')
+            mode = 'replay'
+            vm.add_args('-s', '-S')
+        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
+                    (shift, mode, replay_path),
+                    '-net', 'none')
+        vm.add_args('-drive', 'file=%s,if=none' % image_path)
+        if args:
+            vm.add_args(*args)
+        vm.launch()
+        return vm
+
+    @staticmethod
+    def get_reg_le(g, reg):
+        res = g.cmd(b'p%x' % reg)
+        num = 0
+        for i in range(len(res))[-2::-2]:
+            num = 0x100 * num + int(res[i:i + 2], 16)
+        return num
+
+    @staticmethod
+    def get_reg_be(g, reg):
+        res = g.cmd(b'p%x' % reg)
+        return int(res, 16)
+
+    def get_reg(self, g, reg):
+        # value may be encoded in BE or LE order
+        if self.endian_is_le:
+            return self.get_reg_le(g, reg)
+        else:
+            return self.get_reg_be(g, reg)
+
+    def get_pc(self, g):
+        return self.get_reg(g, self.REG_PC)
+
+    def check_pc(self, g, addr):
+        pc = self.get_pc(g)
+        if pc != addr:
+            self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
+
+    @staticmethod
+    def gdb_step(g):
+        g.cmd(b's', b'T05thread:01;')
+
+    @staticmethod
+    def gdb_bstep(g):
+        g.cmd(b'bs', b'T05thread:01;')
+
+    @staticmethod
+    def vm_get_icount(vm):
+        return vm.qmp('query-replay')['return']['icount']
+
+    def reverse_debugging(self, shift=7, args=None):
+        logger = logging.getLogger('replay')
+
+        # create qcow2 for snapshots
+        logger.info('creating qcow2 image for VM snapshots')
+        image_path = os.path.join(self.workdir, 'disk.qcow2')
+        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
+        if not os.path.exists(qemu_img):
+            qemu_img = find_command('qemu-img', False)
+        if qemu_img is False:
+            self.cancel('Could not find "qemu-img", which is required to '
+                        'create the temporary qcow2 image')
+        cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
+        process.run(cmd)
+
+        replay_path = os.path.join(self.workdir, 'replay.bin')
+
+        # record the log
+        vm = self.run_vm(True, shift, args, replay_path, image_path)
+        while self.vm_get_icount(vm) <= self.STEPS:
+            pass
+        last_icount = self.vm_get_icount(vm)
+        vm.shutdown()
+
+        logger.info("recorded log with %s+ steps" % last_icount)
+
+        # replay and run debug commands
+        vm = self.run_vm(False, shift, args, replay_path, image_path)
+        logger.info('connecting to gdbstub')
+        g = gdb.GDBRemote('127.0.0.1', 1234, False, False)
+        g.connect()
+        r = g.cmd(b'qSupported')
+        if b'qXfer:features:read+' in r:
+            g.cmd(b'qXfer:features:read:target.xml:0,ffb')
+        if b'ReverseStep+' not in r:
+            self.fail('Reverse step is not supported by QEMU')
+        if b'ReverseContinue+' not in r:
+            self.fail('Reverse continue is not supported by QEMU')
+
+        logger.info('stepping forward')
+        steps = []
+        # record first instruction addresses
+        for _ in range(self.STEPS):
+            pc = self.get_pc(g)
+            logger.info('saving position %x' % pc)
+            steps.append(pc)
+            self.gdb_step(g)
+
+        # visit the recorded instruction in reverse order
+        logger.info('stepping backward')
+        for addr in steps[::-1]:
+            self.gdb_bstep(g)
+            self.check_pc(g, addr)
+            logger.info('found position %x' % addr)
+
+        logger.info('seeking to the end (icount %s)' % (last_icount - 1))
+        vm.qmp('replay-break', icount=last_icount - 1)
+        # continue - will return after pausing
+        g.cmd(b'c', b'T02thread:01;')
+
+        logger.info('setting breakpoints')
+        for addr in steps:
+            # hardware breakpoint at addr with len=1
+            g.cmd(b'Z1,%x,1' % addr, b'OK')
+
+        logger.info('running reverse continue to reach %x' % steps[-1])
+        # reverse continue - will return after stopping at the breakpoint
+        g.cmd(b'bc', b'T05thread:01;')
+
+        # assume that none of the first instructions is executed again
+        # breaking the order of the breakpoints
+        self.check_pc(g, steps[-1])
+        logger.info('successfully reached %x' % steps[-1])
+
+        logger.info('exitting gdb and qemu')
+        vm.shutdown()
+
+class ReverseDebugging_X86_64(ReverseDebugging):
+    REG_PC = 0x10
+    REG_CS = 0x12
+    def get_pc(self, g):
+        return self.get_reg_le(g, self.REG_PC) \
+            + self.get_reg_le(g, self.REG_CS) * 0x10
+
+    # unidentified gitlab timeout problem
+    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+    def test_x86_64_pc(self):
+        """
+        :avocado: tags=arch:x86_64
+        :avocado: tags=machine:pc
+        """
+        # start with BIOS only
+        self.reverse_debugging()
+
+class ReverseDebugging_AArch64(ReverseDebugging):
+    REG_PC = 32
+
+    # unidentified gitlab timeout problem
+    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+    def test_aarch64_virt(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:virt
+        :avocado: tags=cpu:cortex-a53
+        """
+        kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
+                      '/linux/releases/29/Everything/aarch64/os/images/pxeboot'
+                      '/vmlinuz')
+        kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        self.reverse_debugging(
+            args=('-kernel', kernel_path, '-cpu', 'cortex-a53'))