diff mbox

qemu-iotests: fix 030 for faster machines

Message ID 1381804911-3664-1-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng Oct. 15, 2013, 2:41 a.m. UTC
If the block job completes too fast, the test can fail. Change the
numbers so the qmp events are more stably captured by the script.

A sleep is removed for the same reason.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 tests/qemu-iotests/030 | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Comments

Max Reitz Oct. 16, 2013, 6:45 p.m. UTC | #1
On 2013-10-15 04:41, Fam Zheng wrote:
> If the block job completes too fast, the test can fail. Change the
> numbers so the qmp events are more stably captured by the script.
>
> A sleep is removed for the same reason.
>
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>   tests/qemu-iotests/030 | 11 +++++------
>   1 file changed, 5 insertions(+), 6 deletions(-)
>
> diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
> index ae56f3b..188b182 100755
> --- a/tests/qemu-iotests/030
> +++ b/tests/qemu-iotests/030
> @@ -403,14 +403,13 @@ class TestStreamStop(iotests.QMPTestCase):
>           result = self.vm.qmp('block-stream', device='drive0')
>           self.assert_qmp(result, 'return', {})
>   
> -        time.sleep(0.1)

Hm, I'm not sure whether removing the sleep actually removes the 
underlying race condition… It should work in most cases and the 
foreseeable future, though.

>           events = self.vm.get_qmp_events(wait=False)
>           self.assertEqual(events, [], 'unexpected QMP event: %s' % events)
>   
>           self.cancel_and_wait()
>   
>   class TestSetSpeed(iotests.QMPTestCase):
> -    image_len = 80 * 1024 * 1024 # MB
> +    image_len = 512 * 1024 * 1024 # MB
>   
>       def setUp(self):
>           qemu_img('create', backing_img, str(TestSetSpeed.image_len))
> @@ -457,23 +456,23 @@ class TestSetSpeed(iotests.QMPTestCase):
>           self.assert_qmp(result, 'return[0]/device', 'drive0')
>           self.assert_qmp(result, 'return[0]/speed', 0)
>   
> -        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024)
> +        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024)

So the limit was already 8 MB/s? Doesn't this mean that the job should 
have taken 10 seconds anyway? Sounds to me like the block job speed is 
basically disregarded anyway.

If I re-add the sleep you removed in this patch, this test fails again 
for me. This further suggests block-job-set-speed to be kind of a no-op 
and the changes concerning the image length and the block job speed not 
really contributing to fixing the test.

So I think removing the sleep is all that would have to be done right 
now. OTOH, this is not really a permanent fix, either (the fundamental 
race condition remains). Furthermore, I guess there is some reason for 
having a sleep there - else it would not exist in the first place (and 
it apparently already caused problems some time ago which were "fixed" 
by replacing the previous "sleep(1)" by "sleep(0.1)").

All in all, if someone can assure me of the uneccessity of the sleep in 
question, I think removing it is all that's needed.

Max

>           self.assert_qmp(result, 'return', {})
>   
>           # Ensure the speed we set was accepted
>           result = self.vm.qmp('query-block-jobs')
>           self.assert_qmp(result, 'return[0]/device', 'drive0')
> -        self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024)
> +        self.assert_qmp(result, 'return[0]/speed', 8 * 1024)
>   
>           self.cancel_and_wait()
>   
>           # Check setting speed in block-stream works
> -        result = self.vm.qmp('block-stream', device='drive0', speed=4 * 1024 * 1024)
> +        result = self.vm.qmp('block-stream', device='drive0', speed=4 * 1024)
>           self.assert_qmp(result, 'return', {})
>   
>           result = self.vm.qmp('query-block-jobs')
>           self.assert_qmp(result, 'return[0]/device', 'drive0')
> -        self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024)
> +        self.assert_qmp(result, 'return[0]/speed', 4 * 1024)
>   
>           self.cancel_and_wait()
>
Fam Zheng Oct. 17, 2013, 6:28 a.m. UTC | #2
On Wed, 10/16 20:45, Max Reitz wrote:
> On 2013-10-15 04:41, Fam Zheng wrote:
> >If the block job completes too fast, the test can fail. Change the
> >numbers so the qmp events are more stably captured by the script.
> >
> >A sleep is removed for the same reason.
> >
> >Signed-off-by: Fam Zheng <famz@redhat.com>
> >---
> >  tests/qemu-iotests/030 | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> >
> >diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
> >index ae56f3b..188b182 100755
> >--- a/tests/qemu-iotests/030
> >+++ b/tests/qemu-iotests/030
> >@@ -403,14 +403,13 @@ class TestStreamStop(iotests.QMPTestCase):
> >          result = self.vm.qmp('block-stream', device='drive0')
> >          self.assert_qmp(result, 'return', {})
> >-        time.sleep(0.1)
> 
> Hm, I'm not sure whether removing the sleep actually removes the
> underlying race condition… It should work in most cases and the
> foreseeable future, though.
> 
> >          events = self.vm.get_qmp_events(wait=False)
> >          self.assertEqual(events, [], 'unexpected QMP event: %s' % events)
> >          self.cancel_and_wait()
> >  class TestSetSpeed(iotests.QMPTestCase):
> >-    image_len = 80 * 1024 * 1024 # MB
> >+    image_len = 512 * 1024 * 1024 # MB
> >      def setUp(self):
> >          qemu_img('create', backing_img, str(TestSetSpeed.image_len))
> >@@ -457,23 +456,23 @@ class TestSetSpeed(iotests.QMPTestCase):
> >          self.assert_qmp(result, 'return[0]/device', 'drive0')
> >          self.assert_qmp(result, 'return[0]/speed', 0)
> >-        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024)
> >+        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024)
> 
> So the limit was already 8 MB/s? Doesn't this mean that the job
> should have taken 10 seconds anyway? Sounds to me like the block job
> speed is basically disregarded anyway.

No, see below...

> 
> If I re-add the sleep you removed in this patch, this test fails
> again for me. This further suggests block-job-set-speed to be kind
> of a no-op and the changes concerning the image length and the block
> job speed not really contributing to fixing the test.
> 
> So I think removing the sleep is all that would have to be done
> right now. OTOH, this is not really a permanent fix, either (the
> fundamental race condition remains). Furthermore, I guess there is
> some reason for having a sleep there - else it would not exist in
> the first place (and it apparently already caused problems some time
> ago which were "fixed" by replacing the previous "sleep(1)" by
> "sleep(0.1)").
> 
> All in all, if someone can assure me of the uneccessity of the sleep
> in question, I think removing it is all that's needed.
> 
> Max
> 

Both failure cases are just that setting speed or checking status comes too
late: the streaming finishes or goes close to finish in negligible no time once
the job is started. In other words dropping the speed change but only increase
image_len and remove sleep will fix it for me too.

Fam
Stefan Hajnoczi Oct. 17, 2013, 12:38 p.m. UTC | #3
On Tue, Oct 15, 2013 at 10:41:51AM +0800, Fam Zheng wrote:
> If the block job completes too fast, the test can fail. Change the
> numbers so the qmp events are more stably captured by the script.
> 
> A sleep is removed for the same reason.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  tests/qemu-iotests/030 | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)

Can you try using the blkdebug suspend command to pause I/O?  That way
the test can be made reliable.

Stefan
Max Reitz Oct. 18, 2013, 6:17 p.m. UTC | #4
On 2013-10-17 08:28, Fam Zheng wrote:
> On Wed, 10/16 20:45, Max Reitz wrote:
>> On 2013-10-15 04:41, Fam Zheng wrote:
>>> If the block job completes too fast, the test can fail. Change the
>>> numbers so the qmp events are more stably captured by the script.
>>>
>>> A sleep is removed for the same reason.
>>>
>>> Signed-off-by: Fam Zheng <famz@redhat.com>
>>> ---
>>>   tests/qemu-iotests/030 | 11 +++++------
>>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
>>> index ae56f3b..188b182 100755
>>> --- a/tests/qemu-iotests/030
>>> +++ b/tests/qemu-iotests/030
>>> @@ -403,14 +403,13 @@ class TestStreamStop(iotests.QMPTestCase):
>>>           result = self.vm.qmp('block-stream', device='drive0')
>>>           self.assert_qmp(result, 'return', {})
>>> -        time.sleep(0.1)
>> Hm, I'm not sure whether removing the sleep actually removes the
>> underlying race condition… It should work in most cases and the
>> foreseeable future, though.
>>
>>>           events = self.vm.get_qmp_events(wait=False)
>>>           self.assertEqual(events, [], 'unexpected QMP event: %s' % events)
>>>           self.cancel_and_wait()
>>>   class TestSetSpeed(iotests.QMPTestCase):
>>> -    image_len = 80 * 1024 * 1024 # MB
>>> +    image_len = 512 * 1024 * 1024 # MB
>>>       def setUp(self):
>>>           qemu_img('create', backing_img, str(TestSetSpeed.image_len))
>>> @@ -457,23 +456,23 @@ class TestSetSpeed(iotests.QMPTestCase):
>>>           self.assert_qmp(result, 'return[0]/device', 'drive0')
>>>           self.assert_qmp(result, 'return[0]/speed', 0)
>>> -        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024)
>>> +        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024)
>> So the limit was already 8 MB/s? Doesn't this mean that the job
>> should have taken 10 seconds anyway? Sounds to me like the block job
>> speed is basically disregarded anyway.
> No, see below...
>
>> If I re-add the sleep you removed in this patch, this test fails
>> again for me. This further suggests block-job-set-speed to be kind
>> of a no-op and the changes concerning the image length and the block
>> job speed not really contributing to fixing the test.
>>
>> So I think removing the sleep is all that would have to be done
>> right now. OTOH, this is not really a permanent fix, either (the
>> fundamental race condition remains). Furthermore, I guess there is
>> some reason for having a sleep there - else it would not exist in
>> the first place (and it apparently already caused problems some time
>> ago which were "fixed" by replacing the previous "sleep(1)" by
>> "sleep(0.1)").
>>
>> All in all, if someone can assure me of the uneccessity of the sleep
>> in question, I think removing it is all that's needed.
>>
>> Max
>>
> Both failure cases are just that setting speed or checking status comes too
> late: the streaming finishes or goes close to finish in negligible no time once
> the job is started. In other words dropping the speed change but only increase
> image_len and remove sleep will fix it for me too.

Ah, sorry, I missed that those failures are two seperate test cases and 
both changes are basically independent of each other. Sorry, my fault.

Hm, well, but I'm still not happy with removing the sleep. I've thought 
of a different solution myself and didn't find any either… But the fact 
remains that there are three things that can happen:

First, the block job might finish before the cancelling QMP command gets 
sent anyway. The test script and qemu are independent of each other, so 
this may still happen (although the block device has to be really fast 
for that to happen, I guess).

Second, I'd still like an explanation why the sleep is indeed 
unnecessary. I guess its purpose is to have the block job actually 
running before cancelling it – removing the sleep might defeat that 
purpose, though I don't know how bad this is.

Third, since qemu is indeed running independently of the test script, 
the blockjob is in fact running and has not yet finished by the time it 
gets cancelled. This would be the desired result.

I admit that the first outcome is impossible for all realistic 
scenarios. However, the second one is what's making me feel uncomfortable.

Max
Fam Zheng Oct. 30, 2013, 11:45 a.m. UTC | #5
On Thu, 10/17 14:38, Stefan Hajnoczi wrote:
> On Tue, Oct 15, 2013 at 10:41:51AM +0800, Fam Zheng wrote:
> > If the block job completes too fast, the test can fail. Change the
> > numbers so the qmp events are more stably captured by the script.
> > 
> > A sleep is removed for the same reason.
> > 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  tests/qemu-iotests/030 | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> Can you try using the blkdebug suspend command to pause I/O?  That way
> the test can be made reliable.

I'll give a try. The cancel_and_wait need to be replaced by cancel_resume_wait
then.

Thanks,

Fam
diff mbox

Patch

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index ae56f3b..188b182 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -403,14 +403,13 @@  class TestStreamStop(iotests.QMPTestCase):
         result = self.vm.qmp('block-stream', device='drive0')
         self.assert_qmp(result, 'return', {})
 
-        time.sleep(0.1)
         events = self.vm.get_qmp_events(wait=False)
         self.assertEqual(events, [], 'unexpected QMP event: %s' % events)
 
         self.cancel_and_wait()
 
 class TestSetSpeed(iotests.QMPTestCase):
-    image_len = 80 * 1024 * 1024 # MB
+    image_len = 512 * 1024 * 1024 # MB
 
     def setUp(self):
         qemu_img('create', backing_img, str(TestSetSpeed.image_len))
@@ -457,23 +456,23 @@  class TestSetSpeed(iotests.QMPTestCase):
         self.assert_qmp(result, 'return[0]/device', 'drive0')
         self.assert_qmp(result, 'return[0]/speed', 0)
 
-        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024)
+        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024)
         self.assert_qmp(result, 'return', {})
 
         # Ensure the speed we set was accepted
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return[0]/device', 'drive0')
-        self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024)
+        self.assert_qmp(result, 'return[0]/speed', 8 * 1024)
 
         self.cancel_and_wait()
 
         # Check setting speed in block-stream works
-        result = self.vm.qmp('block-stream', device='drive0', speed=4 * 1024 * 1024)
+        result = self.vm.qmp('block-stream', device='drive0', speed=4 * 1024)
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return[0]/device', 'drive0')
-        self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024)
+        self.assert_qmp(result, 'return[0]/speed', 4 * 1024)
 
         self.cancel_and_wait()