diff mbox

block/throttle: Use host clock type

Message ID 1426228529-15969-1-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng March 13, 2015, 6:35 a.m. UTC
Throttle timers won't make any progress when VCPU is not running, which
is prone to stall the request queue in cases like utils, qtest,
suspending, and live migration, unless carefully handled. What we do now
is crude. For example in bdrv_drain_all, requests are resumed
immediately without consulting throttling timer. Unfortunately
bdrv_drain_all is so widely used that there may be too many holes that
guest could bypass throttling.

If we use the host clock, we can just trust the nested poll when waiting
for requests.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block.c               |  2 +-
 tests/test-throttle.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

Comments

Paolo Bonzini March 13, 2015, 8:08 a.m. UTC | #1
On 13/03/2015 07:35, Fam Zheng wrote:
> Throttle timers won't make any progress when VCPU is not running, which
> is prone to stall the request queue in cases like utils, qtest,
> suspending, and live migration, unless carefully handled. What we do now
> is crude. For example in bdrv_drain_all, requests are resumed
> immediately without consulting throttling timer. Unfortunately
> bdrv_drain_all is so widely used that there may be too many holes that
> guest could bypass throttling.
> 
> If we use the host clock, we can just trust the nested poll when waiting
> for requests.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block.c               |  2 +-
>  tests/test-throttle.c | 14 +++++++-------

I think test-throttle.c should use the vm_clock.  At some point it was
managing the clock manually (by overriding cpu_get_clock from
libqemustub.a), and that's only possible with QEMU_CLOCK_VIRTUAL.

As to block.c, I'll leave the review to the block folks.  But I think
QEMU_CLOCK_REALTIME is preferrable.

>  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 191a847..11f9065 100644
> --- a/block.c
> +++ b/block.c
> @@ -184,7 +184,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
>      assert(!bs->io_limits_enabled);
>      throttle_init(&bs->throttle_state,
>                    bdrv_get_aio_context(bs),
> -                  QEMU_CLOCK_VIRTUAL,
> +                  QEMU_CLOCK_HOST,
>                    bdrv_throttle_read_timer_cb,
>                    bdrv_throttle_write_timer_cb,
>                    bs);
> diff --git a/tests/test-throttle.c b/tests/test-throttle.c
> index d8ba415..1fb1792 100644
> --- a/tests/test-throttle.c
> +++ b/tests/test-throttle.c
> @@ -107,11 +107,11 @@ static void test_init(void)
>      memset(&ts, 1, sizeof(ts));
>  
>      /* init the structure */
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>  
>      /* check initialized fields */
> -    g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
> +    g_assert(ts.clock_type == QEMU_CLOCK_HOST);
>      g_assert(ts.timers[0]);
>      g_assert(ts.timers[1]);
>  
> @@ -130,7 +130,7 @@ static void test_init(void)
>  static void test_destroy(void)
>  {
>      int i;
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>      throttle_destroy(&ts);
>      for (i = 0; i < 2; i++) {
> @@ -170,7 +170,7 @@ static void test_config_functions(void)
>  
>      orig_cfg.op_size = 1;
>  
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>      /* structure reset by throttle_init previous_leak should be null */
>      g_assert(!ts.previous_leak);
> @@ -330,7 +330,7 @@ static void test_have_timer(void)
>      g_assert(!throttle_have_timer(&ts));
>  
>      /* init the structure */
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>  
>      /* timer set by init should return true */
> @@ -345,7 +345,7 @@ static void test_detach_attach(void)
>      memset(&ts, 0, sizeof(ts));
>  
>      /* init the structure */
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>  
>      /* timer set by init should return true */
> @@ -387,7 +387,7 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
>  
>      cfg.op_size = op_size;
>  
> -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
>                    read_timer_cb, write_timer_cb, &ts);
>      throttle_config(&ts, &cfg);
>  
>
Fam Zheng March 13, 2015, 8:27 a.m. UTC | #2
On Fri, 03/13 09:08, Paolo Bonzini wrote:
> 
> 
> On 13/03/2015 07:35, Fam Zheng wrote:
> > Throttle timers won't make any progress when VCPU is not running, which
> > is prone to stall the request queue in cases like utils, qtest,
> > suspending, and live migration, unless carefully handled. What we do now
> > is crude. For example in bdrv_drain_all, requests are resumed
> > immediately without consulting throttling timer. Unfortunately
> > bdrv_drain_all is so widely used that there may be too many holes that
> > guest could bypass throttling.
> > 
> > If we use the host clock, we can just trust the nested poll when waiting
> > for requests.
> > 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  block.c               |  2 +-
> >  tests/test-throttle.c | 14 +++++++-------
> 
> I think test-throttle.c should use the vm_clock.  At some point it was
> managing the clock manually (by overriding cpu_get_clock from
> libqemustub.a), and that's only possible with QEMU_CLOCK_VIRTUAL.

Ah! That is in iotests 093 (hint: authord by Fam Zheng :-/), which WILL be
complicated if block.c switches away from QEMU_CLOCK_VIRTUAL. But I'll do the
work if we decide to make this change.

As to tests/test-throttle.c, I don't see its dependency on clock type, so
either way should work and I don't mind keeping it as-is at all.

> 
> As to block.c, I'll leave the review to the block folks.  But I think
> QEMU_CLOCK_REALTIME is preferrable.

Real time clock should be fine, but we should review that the code handles
clock reversing.

Fam

> 
> >  2 files changed, 8 insertions(+), 8 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index 191a847..11f9065 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -184,7 +184,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
> >      assert(!bs->io_limits_enabled);
> >      throttle_init(&bs->throttle_state,
> >                    bdrv_get_aio_context(bs),
> > -                  QEMU_CLOCK_VIRTUAL,
> > +                  QEMU_CLOCK_HOST,
> >                    bdrv_throttle_read_timer_cb,
> >                    bdrv_throttle_write_timer_cb,
> >                    bs);
> > diff --git a/tests/test-throttle.c b/tests/test-throttle.c
> > index d8ba415..1fb1792 100644
> > --- a/tests/test-throttle.c
> > +++ b/tests/test-throttle.c
> > @@ -107,11 +107,11 @@ static void test_init(void)
> >      memset(&ts, 1, sizeof(ts));
> >  
> >      /* init the structure */
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >  
> >      /* check initialized fields */
> > -    g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
> > +    g_assert(ts.clock_type == QEMU_CLOCK_HOST);
> >      g_assert(ts.timers[0]);
> >      g_assert(ts.timers[1]);
> >  
> > @@ -130,7 +130,7 @@ static void test_init(void)
> >  static void test_destroy(void)
> >  {
> >      int i;
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >      throttle_destroy(&ts);
> >      for (i = 0; i < 2; i++) {
> > @@ -170,7 +170,7 @@ static void test_config_functions(void)
> >  
> >      orig_cfg.op_size = 1;
> >  
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >      /* structure reset by throttle_init previous_leak should be null */
> >      g_assert(!ts.previous_leak);
> > @@ -330,7 +330,7 @@ static void test_have_timer(void)
> >      g_assert(!throttle_have_timer(&ts));
> >  
> >      /* init the structure */
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >  
> >      /* timer set by init should return true */
> > @@ -345,7 +345,7 @@ static void test_detach_attach(void)
> >      memset(&ts, 0, sizeof(ts));
> >  
> >      /* init the structure */
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >  
> >      /* timer set by init should return true */
> > @@ -387,7 +387,7 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
> >  
> >      cfg.op_size = op_size;
> >  
> > -    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
> > +    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
> >                    read_timer_cb, write_timer_cb, &ts);
> >      throttle_config(&ts, &cfg);
> >  
> >
Paolo Bonzini March 13, 2015, 11:09 a.m. UTC | #3
On 13/03/2015 09:27, Fam Zheng wrote:
> On Fri, 03/13 09:08, Paolo Bonzini wrote:
>>
>>
>> On 13/03/2015 07:35, Fam Zheng wrote:
>>> Throttle timers won't make any progress when VCPU is not running, which
>>> is prone to stall the request queue in cases like utils, qtest,
>>> suspending, and live migration, unless carefully handled. What we do now
>>> is crude. For example in bdrv_drain_all, requests are resumed
>>> immediately without consulting throttling timer. Unfortunately
>>> bdrv_drain_all is so widely used that there may be too many holes that
>>> guest could bypass throttling.
>>>
>>> If we use the host clock, we can just trust the nested poll when waiting
>>> for requests.
>>>
>>> Signed-off-by: Fam Zheng <famz@redhat.com>
>>> ---
>>>  block.c               |  2 +-
>>>  tests/test-throttle.c | 14 +++++++-------
>>
>> I think test-throttle.c should use the vm_clock.  At some point it was
>> managing the clock manually (by overriding cpu_get_clock from
>> libqemustub.a), and that's only possible with QEMU_CLOCK_VIRTUAL.
> 
> Ah! That is in iotests 093 (hint: authord by Fam Zheng :-/), which WILL be
> complicated if block.c switches away from QEMU_CLOCK_VIRTUAL. But I'll do the
> work if we decide to make this change.
> 
> As to tests/test-throttle.c, I don't see its dependency on clock type, so
> either way should work and I don't mind keeping it as-is at all.

If there's another way to do the same thing, I'd prefer it.

For example, can we call bdrv_drain_all() at the beginning of
do_vm_stop, before pausing the VCPUs?

>> As to block.c, I'll leave the review to the block folks.  But I think
>> QEMU_CLOCK_REALTIME is preferrable.
> 
> Real time clock should be fine, but we should review that the code handles
> clock reversing.

QEMU_CLOCK_HOST is the one that follows the wall clock;
QEMU_CLOCK_REALTIME is monotonic. :)

Paolo
Fam Zheng March 13, 2015, 11:43 a.m. UTC | #4
On Fri, 03/13 12:09, Paolo Bonzini wrote:
> 
> 
> On 13/03/2015 09:27, Fam Zheng wrote:
> > On Fri, 03/13 09:08, Paolo Bonzini wrote:
> >>
> >>
> >> On 13/03/2015 07:35, Fam Zheng wrote:
> >>> Throttle timers won't make any progress when VCPU is not running, which
> >>> is prone to stall the request queue in cases like utils, qtest,
> >>> suspending, and live migration, unless carefully handled. What we do now
> >>> is crude. For example in bdrv_drain_all, requests are resumed
> >>> immediately without consulting throttling timer. Unfortunately
> >>> bdrv_drain_all is so widely used that there may be too many holes that
> >>> guest could bypass throttling.
> >>>
> >>> If we use the host clock, we can just trust the nested poll when waiting
> >>> for requests.
> >>>
> >>> Signed-off-by: Fam Zheng <famz@redhat.com>
> >>> ---
> >>>  block.c               |  2 +-
> >>>  tests/test-throttle.c | 14 +++++++-------
> >>
> >> I think test-throttle.c should use the vm_clock.  At some point it was
> >> managing the clock manually (by overriding cpu_get_clock from
> >> libqemustub.a), and that's only possible with QEMU_CLOCK_VIRTUAL.
> > 
> > Ah! That is in iotests 093 (hint: authord by Fam Zheng :-/), which WILL be
> > complicated if block.c switches away from QEMU_CLOCK_VIRTUAL. But I'll do the
> > work if we decide to make this change.
> > 
> > As to tests/test-throttle.c, I don't see its dependency on clock type, so
> > either way should work and I don't mind keeping it as-is at all.
> 
> If there's another way to do the same thing, I'd prefer it.
> 
> For example, can we call bdrv_drain_all() at the beginning of
> do_vm_stop, before pausing the VCPUs?

Even with that, I still don't understand why block jobs should stop making
progress together with VCPUs.

IMO following host clock is the right things to do, because in the IO throttle
context, we are mostly refering to host resources (host_BW=host_IO/host_time).

> 
> >> As to block.c, I'll leave the review to the block folks.  But I think
> >> QEMU_CLOCK_REALTIME is preferrable.
> > 
> > Real time clock should be fine, but we should review that the code handles
> > clock reversing.
> 
> QEMU_CLOCK_HOST is the one that follows the wall clock;
> QEMU_CLOCK_REALTIME is monotonic. :)

I totally misread :)

Fam
Alberto Garcia March 13, 2015, 12:23 p.m. UTC | #5
On Fri, Mar 13, 2015 at 02:35:29PM +0800, Fam Zheng wrote:

> Throttle timers won't make any progress when VCPU is not running,
> which is prone to stall the request queue in cases like utils,
> qtest, suspending, and live migration, unless carefully handled.

Yes, this can be easily reproduced by stopping the VM and starting a
block-commit job. If the I/O in that device is throttled then the job
will be stalled.

Then there's also the situation that we discussed in IRC: if the
block-commit job is ongoing and then we stop the VM, then the rest of
the data will be committed bypassing the throttling settings.  But
that's not related to these changes.

> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block.c               |  2 +-
>  tests/test-throttle.c | 14 +++++++-------
>  2 files changed, 8 insertions(+), 8 deletions(-)

Reviewed-By: Alberto Garcia <berto@igalia.com>

Berto
Paolo Bonzini March 13, 2015, 12:28 p.m. UTC | #6
On 13/03/2015 13:23, Alberto Garcia wrote:
> On Fri, Mar 13, 2015 at 02:35:29PM +0800, Fam Zheng wrote:
> 
>> Throttle timers won't make any progress when VCPU is not running,
>> which is prone to stall the request queue in cases like utils,
>> qtest, suspending, and live migration, unless carefully handled.
> 
> Yes, this can be easily reproduced by stopping the VM and starting a
> block-commit job. If the I/O in that device is throttled then the job
> will be stalled.

That may be a different bug.  Should jobs be subject to throttling at all?

Paolo

> Then there's also the situation that we discussed in IRC: if the
> block-commit job is ongoing and then we stop the VM, then the rest of
> the data will be committed bypassing the throttling settings.  But
> that's not related to these changes.
> 
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> ---
>>  block.c               |  2 +-
>>  tests/test-throttle.c | 14 +++++++-------
>>  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> Reviewed-By: Alberto Garcia <berto@igalia.com>
> 
> Berto
> 
>
Fam Zheng March 16, 2015, 2:15 a.m. UTC | #7
On Fri, 03/13 13:28, Paolo Bonzini wrote:
> 
> 
> On 13/03/2015 13:23, Alberto Garcia wrote:
> > On Fri, Mar 13, 2015 at 02:35:29PM +0800, Fam Zheng wrote:
> > 
> >> Throttle timers won't make any progress when VCPU is not running,
> >> which is prone to stall the request queue in cases like utils,
> >> qtest, suspending, and live migration, unless carefully handled.
> > 
> > Yes, this can be easily reproduced by stopping the VM and starting a
> > block-commit job. If the I/O in that device is throttled then the job
> > will be stalled.
> 
> That may be a different bug.  Should jobs be subject to throttling at all?

You are asking about the next bug below, right?

I lean towards yes, but the problem is that we are in the middle of yes and no.
Before "stop" it is throttled, but upon "stop", it is drained right to the end,
unthrottled.

Fam

> 
> Paolo
> 
> > Then there's also the situation that we discussed in IRC: if the
> > block-commit job is ongoing and then we stop the VM, then the rest of
> > the data will be committed bypassing the throttling settings.  But
> > that's not related to these changes.
> > 
> >> Signed-off-by: Fam Zheng <famz@redhat.com>
> >> ---
> >>  block.c               |  2 +-
> >>  tests/test-throttle.c | 14 +++++++-------
> >>  2 files changed, 8 insertions(+), 8 deletions(-)
> > 
> > Reviewed-By: Alberto Garcia <berto@igalia.com>
> > 
> > Berto
> > 
> > 
>
Kevin Wolf March 16, 2015, 3:18 p.m. UTC | #8
Am 13.03.2015 um 13:23 hat Alberto Garcia geschrieben:
> On Fri, Mar 13, 2015 at 02:35:29PM +0800, Fam Zheng wrote:
> 
> > Throttle timers won't make any progress when VCPU is not running,
> > which is prone to stall the request queue in cases like utils,
> > qtest, suspending, and live migration, unless carefully handled.
> 
> Yes, this can be easily reproduced by stopping the VM and starting a
> block-commit job. If the I/O in that device is throttled then the job
> will be stalled.
> 
> Then there's also the situation that we discussed in IRC: if the
> block-commit job is ongoing and then we stop the VM, then the rest of
> the data will be committed bypassing the throttling settings.  But
> that's not related to these changes.
> 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  block.c               |  2 +-
> >  tests/test-throttle.c | 14 +++++++-------
> >  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> Reviewed-By: Alberto Garcia <berto@igalia.com>

Thanks, applied to the block branch.

Kevin
Kevin Wolf March 16, 2015, 4:08 p.m. UTC | #9
Am 13.03.2015 um 07:35 hat Fam Zheng geschrieben:
> Throttle timers won't make any progress when VCPU is not running, which
> is prone to stall the request queue in cases like utils, qtest,
> suspending, and live migration, unless carefully handled. What we do now
> is crude. For example in bdrv_drain_all, requests are resumed
> immediately without consulting throttling timer. Unfortunately
> bdrv_drain_all is so widely used that there may be too many holes that
> guest could bypass throttling.
> 
> If we use the host clock, we can just trust the nested poll when waiting
> for requests.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>

This breaks qemu-iotests 093. Dropped it again from the block branch.

Kevin
diff mbox

Patch

diff --git a/block.c b/block.c
index 191a847..11f9065 100644
--- a/block.c
+++ b/block.c
@@ -184,7 +184,7 @@  void bdrv_io_limits_enable(BlockDriverState *bs)
     assert(!bs->io_limits_enabled);
     throttle_init(&bs->throttle_state,
                   bdrv_get_aio_context(bs),
-                  QEMU_CLOCK_VIRTUAL,
+                  QEMU_CLOCK_HOST,
                   bdrv_throttle_read_timer_cb,
                   bdrv_throttle_write_timer_cb,
                   bs);
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index d8ba415..1fb1792 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -107,11 +107,11 @@  static void test_init(void)
     memset(&ts, 1, sizeof(ts));
 
     /* init the structure */
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
 
     /* check initialized fields */
-    g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
+    g_assert(ts.clock_type == QEMU_CLOCK_HOST);
     g_assert(ts.timers[0]);
     g_assert(ts.timers[1]);
 
@@ -130,7 +130,7 @@  static void test_init(void)
 static void test_destroy(void)
 {
     int i;
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
     throttle_destroy(&ts);
     for (i = 0; i < 2; i++) {
@@ -170,7 +170,7 @@  static void test_config_functions(void)
 
     orig_cfg.op_size = 1;
 
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
     /* structure reset by throttle_init previous_leak should be null */
     g_assert(!ts.previous_leak);
@@ -330,7 +330,7 @@  static void test_have_timer(void)
     g_assert(!throttle_have_timer(&ts));
 
     /* init the structure */
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
 
     /* timer set by init should return true */
@@ -345,7 +345,7 @@  static void test_detach_attach(void)
     memset(&ts, 0, sizeof(ts));
 
     /* init the structure */
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
 
     /* timer set by init should return true */
@@ -387,7 +387,7 @@  static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
 
     cfg.op_size = op_size;
 
-    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+    throttle_init(&ts, ctx, QEMU_CLOCK_HOST,
                   read_timer_cb, write_timer_cb, &ts);
     throttle_config(&ts, &cfg);