diff mbox series

[2/2] iotests/block-status-cache: New test

Message ID 20220117162649.193501-3-hreitz@redhat.com
State New
Headers show
Series block/io: Update BSC only if want_zero is true | expand

Commit Message

Hanna Czenczek Jan. 17, 2022, 4:26 p.m. UTC
Add a new test to verify that want_zero=false block-status calls do not
pollute the block-status cache for want_zero=true calls.

We check want_zero=true calls and their results using `qemu-img map`
(over NBD), and want_zero=false calls also using `qemu-img map` over
NBD, but using the qemu:allocation-depth context.

(This test case cannot be integrated into nbd-qemu-allocation, because
that is a qcow2 test, and this is a raw test.)

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 tests/qemu-iotests/tests/block-status-cache   | 130 ++++++++++++++++++
 .../qemu-iotests/tests/block-status-cache.out |   5 +
 2 files changed, 135 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/block-status-cache
 create mode 100644 tests/qemu-iotests/tests/block-status-cache.out

Comments

Nir Soffer Jan. 17, 2022, 5:57 p.m. UTC | #1
On Mon, Jan 17, 2022 at 6:26 PM Hanna Reitz <hreitz@redhat.com> wrote:
>
> Add a new test to verify that want_zero=false block-status calls do not
> pollute the block-status cache for want_zero=true calls.
>
> We check want_zero=true calls and their results using `qemu-img map`
> (over NBD), and want_zero=false calls also using `qemu-img map` over
> NBD, but using the qemu:allocation-depth context.
>
> (This test case cannot be integrated into nbd-qemu-allocation, because
> that is a qcow2 test, and this is a raw test.)
>
> Signed-off-by: Hanna Reitz <hreitz@redhat.com>
> ---
>  tests/qemu-iotests/tests/block-status-cache   | 130 ++++++++++++++++++
>  .../qemu-iotests/tests/block-status-cache.out |   5 +
>  2 files changed, 135 insertions(+)
>  create mode 100755 tests/qemu-iotests/tests/block-status-cache
>  create mode 100644 tests/qemu-iotests/tests/block-status-cache.out
>
> diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache
> new file mode 100755
> index 0000000000..1f2c3109d3
> --- /dev/null
> +++ b/tests/qemu-iotests/tests/block-status-cache
> @@ -0,0 +1,130 @@
> +#!/usr/bin/env python3
> +# group: rw quick
> +#
> +# Test cases for the block-status cache.
> +#
> +# Copyright (C) 2022 Red Hat, Inc.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +#
> +
> +import os
> +import signal
> +import iotests
> +from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd
> +
> +
> +image_size = 1 * 1024 * 1024
> +test_img = os.path.join(iotests.test_dir, 'test.img')
> +
> +nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid')
> +nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
> +
> +
> +class TestBscWithNbd(iotests.QMPTestCase):
> +    def setUp(self) -> None:
> +        """Just create an empty image with a read-only NBD server on it"""
> +        assert qemu_img_create('-f', iotests.imgfmt, test_img,
> +                               str(image_size)) == 0
> +
> +        assert qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, '-t', '-A', '-r',
> +                        f'--pid-file={nbd_pidfile}', test_img) == 0

This is a good place to comment that -A (or better --allocation-depth)
is required for this test.

> +
> +    def tearDown(self) -> None:
> +        with open(nbd_pidfile, encoding='utf-8') as f:
> +            pid = int(f.read())
> +        os.kill(pid, signal.SIGTERM)
> +        os.remove(nbd_pidfile)
> +        os.remove(test_img)
> +
> +    def test_with_zero_bug(self) -> None:
> +        """
> +        Verify that the block-status cache is not corrupted by a
> +        want_zero=false call.
> +        We can provoke a want_zero=false call with `qemu-img map` over NBD with
> +        x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map`

x-dirty-bitmap=qemu:allocation-depth looks a bit dirty to me but I guess
we don't have a better way without depending on another nbd client.

If depending on libnbd is ok, this test can be much simpler:

$ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
         0        4096    0  data
      4096  1073737728    3  hole,zero
$ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
         0  1073741824    1  local
$ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
         0  1073741824    1  local
$ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
         0  1073741824    0  data

> +        (which results in want_zero=true), then using said
> +        qemu:allocation-depth context, and finally another normal `map` to
> +        verify that the cache has not been corrupted.
> +        """
> +
> +        nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}'
> +        nbd_img_opts_alloc_depth = nbd_img_opts + \
> +            ',x-dirty-bitmap=qemu:allocation-depth'
> +
> +        # Normal map, results in want_zero=true.
> +        # This will probably detect an allocated data sector first (qemu likes
> +        # to allocate the first sector to facilitate alignment probing), and
> +        # then the rest to be zero.  The BSC will thus contain (if anything)
> +        # one range covering the first sector.
> +        map_pre = qemu_img_pipe('map', '--output=json', '--image-opts',
> +                                nbd_img_opts)
> +
> +        # qemu:allocation-depth maps for want_zero=false.
> +        # want_zero=false should (with the file driver, which the server is
> +        # using) report everything as data.  While this is sufficient for
> +        # want_zero=false, this is nothing that should end up in the
> +        # block-status cache.
> +        # Due to a bug, this information did end up in the cache, though, and
> +        # this would lead to wrong information being returned on subsequent
> +        # want_zero=true calls.
> +        #
> +        # We need to run this map twice: On the first call, we probably still
> +        # have the first sector in the cache, and so this will be served from
> +        # the cache; and only the subsequent range will be queried from the
> +        # block driver.  This subsequent range will then be entered into the
> +        # cache.
> +        # If we did a want_zero=true call at this point, we would thus get
> +        # correct information: The first sector is not covered by the cache, so
> +        # we would get fresh block-status information from the driver, which
> +        # would return a data range, and this would then go into the cache,
> +        # evicting the wrong range from the want_zero=false call before.
> +        #
> +        # Therefore, we need a second want_zero=false map to reproduce:
> +        # Since the first sector is not in the cache, the query for its status
> +        # will go to the driver, which will return a result that reports the
> +        # whole image to be a single data area.  This result will then go into
> +        # the cache, and so the cache will then report the whole image to
> +        # contain data.

Interesting, but once we fix the bug this complex flow is gone so
we can eliminate this text, no?

> +        #
> +        # Note that once the cache reports the whole image to contain data, any
> +        # subsequent map operation will be served from the cache, and so we can
> +        # never loop too many times here.
> +        for _ in range(2):
> +            # (Ignore the result, this is just to contaminate the cache)
> +            qemu_img_pipe('map', '--output=json', '--image-opts',
> +                          nbd_img_opts_alloc_depth)
> +
> +        # Now let's see whether the cache reports everything as data, or
> +        # whether we get correct information (i.e. the same as we got on our
> +        # first attempt).
> +        map_post = qemu_img_pipe('map', '--output=json', '--image-opts',
> +                                 nbd_img_opts)
> +
> +        if map_pre != map_post:
> +            print('ERROR: Map information differs before and after querying ' +
> +                  'qemu:allocation-depth')
> +            print('Before:')
> +            print(map_pre)
> +            print('After:')
> +            print(map_post)
> +
> +            self.fail("Map information differs")
> +
> +
> +if __name__ == '__main__':
> +    # The block-status cache only works on the protocol layer, so to test it,
> +    # we can only use the raw format
> +    iotests.main(supported_fmts=['raw'],
> +                 supported_protocols=['file'])
> diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out
> new file mode 100644
> index 0000000000..ae1213e6f8
> --- /dev/null
> +++ b/tests/qemu-iotests/tests/block-status-cache.out
> @@ -0,0 +1,5 @@
> +.
> +----------------------------------------------------------------------
> +Ran 1 tests
> +
> +OK
> --
> 2.33.1
>

Reviewed-by: Nir Soffer <nsoffer@redhat.com>
Hanna Czenczek Jan. 18, 2022, 4:41 p.m. UTC | #2
On 17.01.22 18:57, Nir Soffer wrote:
> On Mon, Jan 17, 2022 at 6:26 PM Hanna Reitz <hreitz@redhat.com> wrote:
>> Add a new test to verify that want_zero=false block-status calls do not
>> pollute the block-status cache for want_zero=true calls.
>>
>> We check want_zero=true calls and their results using `qemu-img map`
>> (over NBD), and want_zero=false calls also using `qemu-img map` over
>> NBD, but using the qemu:allocation-depth context.
>>
>> (This test case cannot be integrated into nbd-qemu-allocation, because
>> that is a qcow2 test, and this is a raw test.)
>>
>> Signed-off-by: Hanna Reitz <hreitz@redhat.com>
>> ---
>>   tests/qemu-iotests/tests/block-status-cache   | 130 ++++++++++++++++++
>>   .../qemu-iotests/tests/block-status-cache.out |   5 +
>>   2 files changed, 135 insertions(+)
>>   create mode 100755 tests/qemu-iotests/tests/block-status-cache
>>   create mode 100644 tests/qemu-iotests/tests/block-status-cache.out
>>
>> diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache
>> new file mode 100755
>> index 0000000000..1f2c3109d3
>> --- /dev/null
>> +++ b/tests/qemu-iotests/tests/block-status-cache
>> @@ -0,0 +1,130 @@
>> +#!/usr/bin/env python3
>> +# group: rw quick
>> +#
>> +# Test cases for the block-status cache.
>> +#
>> +# Copyright (C) 2022 Red Hat, Inc.
>> +#
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 2 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
>> +#
>> +
>> +import os
>> +import signal
>> +import iotests
>> +from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd
>> +
>> +
>> +image_size = 1 * 1024 * 1024
>> +test_img = os.path.join(iotests.test_dir, 'test.img')
>> +
>> +nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid')
>> +nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
>> +
>> +
>> +class TestBscWithNbd(iotests.QMPTestCase):
>> +    def setUp(self) -> None:
>> +        """Just create an empty image with a read-only NBD server on it"""
>> +        assert qemu_img_create('-f', iotests.imgfmt, test_img,
>> +                               str(image_size)) == 0
>> +
>> +        assert qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, '-t', '-A', '-r',
>> +                        f'--pid-file={nbd_pidfile}', test_img) == 0
> This is a good place to comment that -A (or better --allocation-depth)
> is required for this test.

Sure, why not.

>> +
>> +    def tearDown(self) -> None:
>> +        with open(nbd_pidfile, encoding='utf-8') as f:
>> +            pid = int(f.read())
>> +        os.kill(pid, signal.SIGTERM)
>> +        os.remove(nbd_pidfile)
>> +        os.remove(test_img)
>> +
>> +    def test_with_zero_bug(self) -> None:
>> +        """
>> +        Verify that the block-status cache is not corrupted by a
>> +        want_zero=false call.
>> +        We can provoke a want_zero=false call with `qemu-img map` over NBD with
>> +        x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map`
> x-dirty-bitmap=qemu:allocation-depth looks a bit dirty to me but I guess
> we don't have a better way without depending on another nbd client.

Should be just fine for an iotest.  If the parameter is ever reworked, 
we can easily adjust the test.

> If depending on libnbd is ok, this test can be much simpler:
>
> $ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
>           0        4096    0  data
>        4096  1073737728    3  hole,zero
> $ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
>           0  1073741824    1  local
> $ nbdinfo --map=qemu:allocation-depth nbd+unix:///?socket=/tmp/nbd.sock
>           0  1073741824    1  local
> $ nbdinfo --map=base:allocation nbd+unix:///?socket=/tmp/nbd.sock
>           0  1073741824    0  data
>
>> +        (which results in want_zero=true), then using said
>> +        qemu:allocation-depth context, and finally another normal `map` to
>> +        verify that the cache has not been corrupted.
>> +        """
>> +
>> +        nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}'
>> +        nbd_img_opts_alloc_depth = nbd_img_opts + \
>> +            ',x-dirty-bitmap=qemu:allocation-depth'
>> +
>> +        # Normal map, results in want_zero=true.
>> +        # This will probably detect an allocated data sector first (qemu likes
>> +        # to allocate the first sector to facilitate alignment probing), and
>> +        # then the rest to be zero.  The BSC will thus contain (if anything)
>> +        # one range covering the first sector.
>> +        map_pre = qemu_img_pipe('map', '--output=json', '--image-opts',
>> +                                nbd_img_opts)
>> +
>> +        # qemu:allocation-depth maps for want_zero=false.
>> +        # want_zero=false should (with the file driver, which the server is
>> +        # using) report everything as data.  While this is sufficient for
>> +        # want_zero=false, this is nothing that should end up in the
>> +        # block-status cache.
>> +        # Due to a bug, this information did end up in the cache, though, and
>> +        # this would lead to wrong information being returned on subsequent
>> +        # want_zero=true calls.
>> +        #
>> +        # We need to run this map twice: On the first call, we probably still
>> +        # have the first sector in the cache, and so this will be served from
>> +        # the cache; and only the subsequent range will be queried from the
>> +        # block driver.  This subsequent range will then be entered into the
>> +        # cache.
>> +        # If we did a want_zero=true call at this point, we would thus get
>> +        # correct information: The first sector is not covered by the cache, so
>> +        # we would get fresh block-status information from the driver, which
>> +        # would return a data range, and this would then go into the cache,
>> +        # evicting the wrong range from the want_zero=false call before.
>> +        #
>> +        # Therefore, we need a second want_zero=false map to reproduce:
>> +        # Since the first sector is not in the cache, the query for its status
>> +        # will go to the driver, which will return a result that reports the
>> +        # whole image to be a single data area.  This result will then go into
>> +        # the cache, and so the cache will then report the whole image to
>> +        # contain data.
> Interesting, but once we fix the bug this complex flow is gone so
> we can eliminate this text, no?

Well...  Once the bug is fixed, we technically don’t need this test at 
all, right? O:)

This is a regression test to show that the behavior before the fix is 
broken, so it needs to reproduce the bug that existed before the fix.  
Therefore, it should include a detailed description how the reproducer 
works, i.e. in what way qemu’s behavior was broken before the fix.

(Otherwise it’d be unclear why we need to have this `range(2)` loop.)

>> +        #
>> +        # Note that once the cache reports the whole image to contain data, any
>> +        # subsequent map operation will be served from the cache, and so we can
>> +        # never loop too many times here.
>> +        for _ in range(2):
>> +            # (Ignore the result, this is just to contaminate the cache)
>> +            qemu_img_pipe('map', '--output=json', '--image-opts',
>> +                          nbd_img_opts_alloc_depth)
>> +
>> +        # Now let's see whether the cache reports everything as data, or
>> +        # whether we get correct information (i.e. the same as we got on our
>> +        # first attempt).
>> +        map_post = qemu_img_pipe('map', '--output=json', '--image-opts',
>> +                                 nbd_img_opts)
>> +
>> +        if map_pre != map_post:
>> +            print('ERROR: Map information differs before and after querying ' +
>> +                  'qemu:allocation-depth')
>> +            print('Before:')
>> +            print(map_pre)
>> +            print('After:')
>> +            print(map_post)
>> +
>> +            self.fail("Map information differs")
>> +
>> +
>> +if __name__ == '__main__':
>> +    # The block-status cache only works on the protocol layer, so to test it,
>> +    # we can only use the raw format
>> +    iotests.main(supported_fmts=['raw'],
>> +                 supported_protocols=['file'])
>> diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out
>> new file mode 100644
>> index 0000000000..ae1213e6f8
>> --- /dev/null
>> +++ b/tests/qemu-iotests/tests/block-status-cache.out
>> @@ -0,0 +1,5 @@
>> +.
>> +----------------------------------------------------------------------
>> +Ran 1 tests
>> +
>> +OK
>> --
>> 2.33.1
>>
> Reviewed-by: Nir Soffer <nsoffer@redhat.com>

Thanks, and also thanks for reporting and testing!

(I’ll send a v2 with a comment added for the qemu-nbd invocation.)

Hanna
diff mbox series

Patch

diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache
new file mode 100755
index 0000000000..1f2c3109d3
--- /dev/null
+++ b/tests/qemu-iotests/tests/block-status-cache
@@ -0,0 +1,130 @@ 
+#!/usr/bin/env python3
+# group: rw quick
+#
+# Test cases for the block-status cache.
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import signal
+import iotests
+from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd
+
+
+image_size = 1 * 1024 * 1024
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid')
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+
+
+class TestBscWithNbd(iotests.QMPTestCase):
+    def setUp(self) -> None:
+        """Just create an empty image with a read-only NBD server on it"""
+        assert qemu_img_create('-f', iotests.imgfmt, test_img,
+                               str(image_size)) == 0
+
+        assert qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, '-t', '-A', '-r',
+                        f'--pid-file={nbd_pidfile}', test_img) == 0
+
+    def tearDown(self) -> None:
+        with open(nbd_pidfile, encoding='utf-8') as f:
+            pid = int(f.read())
+        os.kill(pid, signal.SIGTERM)
+        os.remove(nbd_pidfile)
+        os.remove(test_img)
+
+    def test_with_zero_bug(self) -> None:
+        """
+        Verify that the block-status cache is not corrupted by a
+        want_zero=false call.
+        We can provoke a want_zero=false call with `qemu-img map` over NBD with
+        x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map`
+        (which results in want_zero=true), then using said
+        qemu:allocation-depth context, and finally another normal `map` to
+        verify that the cache has not been corrupted.
+        """
+
+        nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}'
+        nbd_img_opts_alloc_depth = nbd_img_opts + \
+            ',x-dirty-bitmap=qemu:allocation-depth'
+
+        # Normal map, results in want_zero=true.
+        # This will probably detect an allocated data sector first (qemu likes
+        # to allocate the first sector to facilitate alignment probing), and
+        # then the rest to be zero.  The BSC will thus contain (if anything)
+        # one range covering the first sector.
+        map_pre = qemu_img_pipe('map', '--output=json', '--image-opts',
+                                nbd_img_opts)
+
+        # qemu:allocation-depth maps for want_zero=false.
+        # want_zero=false should (with the file driver, which the server is
+        # using) report everything as data.  While this is sufficient for
+        # want_zero=false, this is nothing that should end up in the
+        # block-status cache.
+        # Due to a bug, this information did end up in the cache, though, and
+        # this would lead to wrong information being returned on subsequent
+        # want_zero=true calls.
+        #
+        # We need to run this map twice: On the first call, we probably still
+        # have the first sector in the cache, and so this will be served from
+        # the cache; and only the subsequent range will be queried from the
+        # block driver.  This subsequent range will then be entered into the
+        # cache.
+        # If we did a want_zero=true call at this point, we would thus get
+        # correct information: The first sector is not covered by the cache, so
+        # we would get fresh block-status information from the driver, which
+        # would return a data range, and this would then go into the cache,
+        # evicting the wrong range from the want_zero=false call before.
+        #
+        # Therefore, we need a second want_zero=false map to reproduce:
+        # Since the first sector is not in the cache, the query for its status
+        # will go to the driver, which will return a result that reports the
+        # whole image to be a single data area.  This result will then go into
+        # the cache, and so the cache will then report the whole image to
+        # contain data.
+        #
+        # Note that once the cache reports the whole image to contain data, any
+        # subsequent map operation will be served from the cache, and so we can
+        # never loop too many times here.
+        for _ in range(2):
+            # (Ignore the result, this is just to contaminate the cache)
+            qemu_img_pipe('map', '--output=json', '--image-opts',
+                          nbd_img_opts_alloc_depth)
+
+        # Now let's see whether the cache reports everything as data, or
+        # whether we get correct information (i.e. the same as we got on our
+        # first attempt).
+        map_post = qemu_img_pipe('map', '--output=json', '--image-opts',
+                                 nbd_img_opts)
+
+        if map_pre != map_post:
+            print('ERROR: Map information differs before and after querying ' +
+                  'qemu:allocation-depth')
+            print('Before:')
+            print(map_pre)
+            print('After:')
+            print(map_post)
+
+            self.fail("Map information differs")
+
+
+if __name__ == '__main__':
+    # The block-status cache only works on the protocol layer, so to test it,
+    # we can only use the raw format
+    iotests.main(supported_fmts=['raw'],
+                 supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/block-status-cache.out
@@ -0,0 +1,5 @@ 
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK