diff mbox

[v3] fsdev: add IO throttle support to fsdev devices

Message ID 1474014817-41572-1-git-send-email-pradeep.jagadeesh@huawei.com
State New
Headers show

Commit Message

Pradeep Jagadeesh Sept. 16, 2016, 8:33 a.m. UTC
Uses throttling APIs to limit I/O bandwidth and number of operations on the 
fsdev devices.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
---
 fsdev/Makefile.objs         |   1 +
 fsdev/file-op-9p.h          |   3 +
 fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++
 fsdev/qemu-fsdev-throttle.c | 191 ++++++++++++++++++++++++++++++++++++++++++++
 fsdev/qemu-fsdev-throttle.h |  42 ++++++++++
 hw/9pfs/9p-local.c          |  11 ++-
 hw/9pfs/9p.c                |   7 ++
 hw/9pfs/cofile.c            |   3 +
 8 files changed, 332 insertions(+), 2 deletions(-)
 create mode 100644 fsdev/qemu-fsdev-throttle.c
 create mode 100644 fsdev/qemu-fsdev-throttle.h

This adds the support for the 9p-local driver.
For now this functionality can be enabled only through qemu cli options.
QMP interface and support to other drivers need further extensions.
To make it simple for other drivers, the throttle code has been put in
separate files.

v1 -> v2:

-Fixed FsContext redeclaration issue
-Removed couple of function declarations from 9p-throttle.h
-Fixed some of the .help messages

v2 -> v3:

-Addressed follwing comments by Claudio Fontana
 -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
 -Checking throttle structure validity before initializing other structures
  in fsdev_throttle_configure_iolimits

-Addressed following comments by Greg Kurz
 -Moved the code from 9pfs directory to fsdev directory, because the throttling
  is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
 -Renamed throttling cli options to throttling.*, as in QMP cli options
 -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
 -Using throttle_enabled() function to set the thottle enabled flag for fsdev.

Comments

Alberto Garcia Sept. 19, 2016, 3:41 p.m. UTC | #1
On Fri 16 Sep 2016 10:33:36 AM CEST, Pradeep Jagadeesh wrote:

Hi,

first of all, sorry for the late reply! Here are my comments:

> --- a/fsdev/qemu-fsdev-opts.c
> +++ b/fsdev/qemu-fsdev-opts.c
> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>          }, {
>              .name = "sock_fd",
>              .type = QEMU_OPT_NUMBER,
> +        }, {
> +            .name = "throttling.iops-total",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit total I/O operations per second",
  /*...*/
> +        },{
> +            .name = "throttling.iops-size",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "when limiting by iops max size of an I/O in bytes",

It would be nice if we could factor these so we don't have to have them
twice in the code. I think it should be doable with qemu_opts_append().

It can be done later in a separate patch if you prefer.

> +typedef struct FsThrottle {
> +    ThrottleState ts;
> +    ThrottleTimers tt;
> +    AioContext   *aioctx;
> +    ThrottleConfig cfg;
> +    bool enabled;
> +    CoQueue      throttled_reqs[2];
> +    unsigned     pending_reqs[2];
> +    bool any_timer_armed[2];
> +    QemuMutex lock;
> +} FsThrottle;

You based your implementation on block/throttle-groups.c. I think yours
can be made simpler because one of the problems with mine is that it
needs to support multiple parallel I/O operations on the same throttling
group, and that's why the locking rules are more complex. With a single
user per ThrottleConfig this is not necessary.

Have you checked if you really need them? My impression is that you
might be able to get rid of the 'lock', 'any_timer_armed' and
'pending_reqs' fields.

Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see how
was the transition from single-drive throttling to group throttling,
specifically the bdrv_io_limits_intercept() function. You will see that
it was simpler.

> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>                               const struct iovec *iov,
>                               int iovcnt, off_t offset)
>  {
> -    ssize_t ret
> -;
> +    ssize_t ret;
> +

This could go to a separate patch, but I'm fine if you include it in
this one.

> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>      const char *sec_model = qemu_opt_get(opts, "security_model");
>      const char *path = qemu_opt_get(opts, "path");
>  
> +    FsThrottle *fst = &fse->fst;
> +

I'd remove the empty line between the declarations of 'path' and 'fst',
however...

>      if (!sec_model) {
>          error_report("Security model not specified, local fs needs security model");
>          error_printf("valid options are:"
> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>          error_report("fsdev: No path specified");
>          return -1;
>      }
> +
> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
> +        return -1;
> +    }

...you don't need to declare fst here, you could also pass &fse->fst
directly.

Berto
Pradeep Jagadeesh Sept. 20, 2016, 9:02 a.m. UTC | #2
Hi Alberto,

Thanks for having look at the patch.
My replies are inline.
> On Fri 16 Sep 2016 10:33:36 AM CEST, Pradeep Jagadeesh wrote:
>
> Hi,
>
> first of all, sorry for the late reply! Here are my comments:
No problem!
>
>> --- a/fsdev/qemu-fsdev-opts.c
>> +++ b/fsdev/qemu-fsdev-opts.c
>> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>>          }, {
>>              .name = "sock_fd",
>>              .type = QEMU_OPT_NUMBER,
>> +        }, {
>> +            .name = "throttling.iops-total",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total I/O operations per second",
>   /*...*/
>> +        },{
>> +            .name = "throttling.iops-size",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "when limiting by iops max size of an I/O in bytes",
>
> It would be nice if we could factor these so we don't have to have them
> twice in the code. I think it should be doable with qemu_opts_append().
>
> It can be done later in a separate patch if you prefer.
OK,as of now I have just copied. We can rework and remove the redundant 
code later.
>
>> +typedef struct FsThrottle {
>> +    ThrottleState ts;
>> +    ThrottleTimers tt;
>> +    AioContext   *aioctx;
>> +    ThrottleConfig cfg;
>> +    bool enabled;
>> +    CoQueue      throttled_reqs[2];
>> +    unsigned     pending_reqs[2];
>> +    bool any_timer_armed[2];
>> +    QemuMutex lock;
>> +} FsThrottle;
>
> You based your implementation on block/throttle-groups.c. I think yours
> can be made simpler because one of the problems with mine is that it
> needs to support multiple parallel I/O operations on the same throttling
> group, and that's why the locking rules are more complex. With a single
> user per ThrottleConfig this is not necessary.
>
> Have you checked if you really need them? My impression is that you
> might be able to get rid of the 'lock', 'any_timer_armed' and
> 'pending_reqs' fields.
>
> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see how
> was the transition from single-drive throttling to group throttling,
> specifically the bdrv_io_limits_intercept() function. You will see that
> it was simpler.
OK,I am not sure,I will have to try. I will look into this and let you know.
>
>> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>>                               const struct iovec *iov,
>>                               int iovcnt, off_t offset)
>>  {
>> -    ssize_t ret
>> -;
>> +    ssize_t ret;
>> +
>
> This could go to a separate patch, but I'm fine if you include it in
> this one.
I have already included as Greg suggested.
>
>> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>      const char *sec_model = qemu_opt_get(opts, "security_model");
>>      const char *path = qemu_opt_get(opts, "path");
>>
>> +    FsThrottle *fst = &fse->fst;
>> +
>
> I'd remove the empty line between the declarations of 'path' and 'fst',
> however...
OK
>
>>      if (!sec_model) {
>>          error_report("Security model not specified, local fs needs security model");
>>          error_printf("valid options are:"
>> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>          error_report("fsdev: No path specified");
>>          return -1;
>>      }
>> +
>> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
>> +        return -1;
>> +    }
>
> ...you don't need to declare fst here, you could also pass &fse->fst
> directly.
OK

-Pradeep

>
> Berto
>
Greg Kurz Sept. 20, 2016, 9:56 p.m. UTC | #3
On Tue, 20 Sep 2016 11:02:07 +0200
Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> wrote:

> Hi Alberto,
> 

Hi,

> Thanks for having look at the patch.
> My replies are inline.
> > On Fri 16 Sep 2016 10:33:36 AM CEST, Pradeep Jagadeesh wrote:
> >
> > Hi,
> >
> > first of all, sorry for the late reply! Here are my comments:  
> No problem!
> >  
> >> --- a/fsdev/qemu-fsdev-opts.c
> >> +++ b/fsdev/qemu-fsdev-opts.c
> >> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
> >>          }, {
> >>              .name = "sock_fd",
> >>              .type = QEMU_OPT_NUMBER,
> >> +        }, {
> >> +            .name = "throttling.iops-total",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit total I/O operations per second",  
> >   /*...*/  
> >> +        },{
> >> +            .name = "throttling.iops-size",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "when limiting by iops max size of an I/O in bytes",  
> >
> > It would be nice if we could factor these so we don't have to have them
> > twice in the code. I think it should be doable with qemu_opts_append().
> >

Or maybe just push the list to a THROTTLE_COMMON_OPTS macro in a
include/qemu/throttle-options.h header file ?

> > It can be done later in a separate patch if you prefer.  
> OK,as of now I have just copied. We can rework and remove the redundant 
> code later.
> >  
> >> +typedef struct FsThrottle {
> >> +    ThrottleState ts;
> >> +    ThrottleTimers tt;
> >> +    AioContext   *aioctx;
> >> +    ThrottleConfig cfg;
> >> +    bool enabled;
> >> +    CoQueue      throttled_reqs[2];
> >> +    unsigned     pending_reqs[2];
> >> +    bool any_timer_armed[2];
> >> +    QemuMutex lock;
> >> +} FsThrottle;  
> >
> > You based your implementation on block/throttle-groups.c. I think yours
> > can be made simpler because one of the problems with mine is that it
> > needs to support multiple parallel I/O operations on the same throttling
> > group, and that's why the locking rules are more complex. With a single
> > user per ThrottleConfig this is not necessary.
> >
> > Have you checked if you really need them? My impression is that you
> > might be able to get rid of the 'lock', 'any_timer_armed' and
> > 'pending_reqs' fields.
> >
> > Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see how
> > was the transition from single-drive throttling to group throttling,
> > specifically the bdrv_io_limits_intercept() function. You will see that
> > it was simpler.  
> OK,I am not sure,I will have to try. I will look into this and let you know.
> >  
> >> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
> >>                               const struct iovec *iov,
> >>                               int iovcnt, off_t offset)
> >>  {
> >> -    ssize_t ret
> >> -;
> >> +    ssize_t ret;
> >> +  
> >
> > This could go to a separate patch, but I'm fine if you include it in
> > this one.  
> I have already included as Greg suggested.
> >  

Isn't Berto referring to the semicolon change ? That's fine for me anyway :)

> >> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
> >>      const char *sec_model = qemu_opt_get(opts, "security_model");
> >>      const char *path = qemu_opt_get(opts, "path");
> >>
> >> +    FsThrottle *fst = &fse->fst;
> >> +  
> >
> > I'd remove the empty line between the declarations of 'path' and 'fst',
> > however...  
> OK
> >  
> >>      if (!sec_model) {
> >>          error_report("Security model not specified, local fs needs security model");
> >>          error_printf("valid options are:"
> >> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
> >>          error_report("fsdev: No path specified");
> >>          return -1;
> >>      }
> >> +
> >> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
> >> +        return -1;
> >> +    }  
> >
> > ...you don't need to declare fst here, you could also pass &fse->fst
> > directly.  
> OK
> 
> -Pradeep
> 
> >
> > Berto
> >  
> 

Cheers.

--
Greg
Greg Kurz Sept. 20, 2016, 10:15 p.m. UTC | #4
On Fri, 16 Sep 2016 04:33:36 -0400
Pradeep Jagadeesh <pradeepkiruvale@gmail.com> wrote:

> Uses throttling APIs to limit I/O bandwidth and number of operations on the 
> fsdev devices.
> 
> Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> ---

Hi Pradeep,

Please find some comments below.

>  fsdev/Makefile.objs         |   1 +
>  fsdev/file-op-9p.h          |   3 +
>  fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++
>  fsdev/qemu-fsdev-throttle.c | 191 ++++++++++++++++++++++++++++++++++++++++++++
>  fsdev/qemu-fsdev-throttle.h |  42 ++++++++++

Is the qemu- prefix really needed ?

>  hw/9pfs/9p-local.c          |  11 ++-
>  hw/9pfs/9p.c                |   7 ++
>  hw/9pfs/cofile.c            |   3 +
>  8 files changed, 332 insertions(+), 2 deletions(-)
>  create mode 100644 fsdev/qemu-fsdev-throttle.c
>  create mode 100644 fsdev/qemu-fsdev-throttle.h
> 
> This adds the support for the 9p-local driver.
> For now this functionality can be enabled only through qemu cli options.
> QMP interface and support to other drivers need further extensions.
> To make it simple for other drivers, the throttle code has been put in
> separate files.
> 
> v1 -> v2:
> 
> -Fixed FsContext redeclaration issue
> -Removed couple of function declarations from 9p-throttle.h
> -Fixed some of the .help messages
> 
> v2 -> v3:
> 
> -Addressed follwing comments by Claudio Fontana
>  -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
>  -Checking throttle structure validity before initializing other structures
>   in fsdev_throttle_configure_iolimits
> 
> -Addressed following comments by Greg Kurz
>  -Moved the code from 9pfs directory to fsdev directory, because the throttling
>   is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
>  -Renamed throttling cli options to throttling.*, as in QMP cli options
>  -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
>  -Using throttle_enabled() function to set the thottle enabled flag for fsdev.
> 
> diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
> index 1b120a4..2c6da2d 100644
> --- a/fsdev/Makefile.objs
> +++ b/fsdev/Makefile.objs
> @@ -7,6 +7,7 @@ common-obj-y = qemu-fsdev-dummy.o
>  endif
>  common-obj-y += qemu-fsdev-opts.o
>  
> +common-obj-y += qemu-fsdev-throttle.o
>  # Toplevel always builds this; targets without virtio will put it in
>  # common-obj-y
>  common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
> index 6db9fea..33fe822 100644
> --- a/fsdev/file-op-9p.h
> +++ b/fsdev/file-op-9p.h
> @@ -17,6 +17,7 @@
>  #include <dirent.h>
>  #include <utime.h>
>  #include <sys/vfs.h>
> +#include "qemu-fsdev-throttle.h"
>  
>  #define SM_LOCAL_MODE_BITS    0600
>  #define SM_LOCAL_DIR_MODE_BITS    0700
> @@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
>      char *path;
>      int export_flags;
>      FileOperations *ops;
> +    FsThrottle fst;
>  } FsDriverEntry;
>  
>  typedef struct FsContext
> @@ -83,6 +85,7 @@ typedef struct FsContext
>      int export_flags;
>      struct xattr_operations **xops;
>      struct extended_ops exops;
> +    FsThrottle *fst;
>      /* fs driver specific data */
>      void *private;
>  } FsContext;
> diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
> index 1dd8c7a..395d497 100644
> --- a/fsdev/qemu-fsdev-opts.c
> +++ b/fsdev/qemu-fsdev-opts.c
> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>          }, {
>              .name = "sock_fd",
>              .type = QEMU_OPT_NUMBER,
> +        }, {
> +            .name = "throttling.iops-total",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit total I/O operations per second",
> +        },{
> +            .name = "throttling.iops-read",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit read operations per second",
> +        },{
> +            .name = "throttling.iops-write",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit write operations per second",
> +        },{
> +            .name = "throttling.bps-total",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit total bytes per second",
> +        },{
> +            .name = "throttling.bps-read",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit read bytes per second",
> +        },{
> +            .name = "throttling.bps-write",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit write bytes per second",
> +        },{
> +            .name = "throttling.iops-total-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations burst",
> +        },{
> +            .name = "throttling.iops-read-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations read burst",
> +        },{
> +            .name = "throttling.iops-write-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations write burst",
> +        },{
> +            .name = "throttling.bps-total-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes burst",
> +        },{
> +            .name = "throttling.bps-read-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes read burst",
> +        },{
> +            .name = "throttling.bps-write-max",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes write burst",
> +        },{
> +            .name = "throttling.iops-total-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iops-total-max burst period, in seconds",
> +        },{
> +            .name = "throttling.iops-read-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iops-read-max burst period, in seconds",
> +        },{
> +            .name = "throttling.iops-write-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iops-write-max burst period, in seconds",
> +        },{
> +            .name = "throttling.bps-total-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bps-total-max burst period, in seconds",
> +        },{
> +            .name = "throttling.bps-read-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bps-read-max burst period, in seconds",
> +        },{
> +            .name = "throttling.bps-write-max-length",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bps-write-max burst period, in seconds",
> +        },{
> +            .name = "throttling.iops-size",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "when limiting by iops max size of an I/O in bytes",
>          },
>  
>          { /*End of list */ }
> diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
> new file mode 100644
> index 0000000..0adccd1
> --- /dev/null
> +++ b/fsdev/qemu-fsdev-throttle.c
> @@ -0,0 +1,191 @@
> +/*
> + * Fsdev Throttle
> + *
> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
> + *
> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * (at your option) any later version.
> + *
> + * See the COPYING file in the top-level directory for details.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "qemu-fsdev-throttle.h"
> +
> +static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
> +{
> +    if (fst->any_timer_armed[is_write]) {
> +        return true;
> +    } else {
> +        return throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
> +    }
> +}
> +
> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
> +{
> +    bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);

Let's add a newline here.

> +    if (!fst->pending_reqs[is_write]) {
> +        return;
> +    }
> +    if (!must_wait) {
> +        if (qemu_in_coroutine() &&
> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
> +            ;
> +       } else {
> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
> +           timer_mod(fst->tt.timers[is_write], now + 1);
> +           fst->any_timer_armed[is_write] = true;
> +       }
> +   }
> +}
> +
> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
> +{
> +    bool empty_queue;

Same here.

> +    qemu_mutex_lock(&fst->lock);
> +    fst->any_timer_armed[is_write] = false;
> +    qemu_mutex_unlock(&fst->lock);
> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
> +    if (empty_queue) {
> +        qemu_mutex_lock(&fst->lock);
> +        fsdev_throttle_schedule_next_request(fst, is_write);
> +        qemu_mutex_unlock(&fst->lock);
> +    }
> +}
> +
> +bool fsdev_throttle_enabled(FsThrottle *fst)
> +{
> +    return fst->enabled;
> +}
> +
> +static void fsdev_iolimits_enable(FsThrottle *fst)
> +{
> +    fst->enabled = true;
> +}
> +
> +static void fsdev_iolimits_disable(FsThrottle *fst)
> +{
> +    fst->enabled = false;
> +}
> +

fsdev_throttle_configure_iolimits() is the only function that will ever
write to fst->enabled. The helpers seem to indicate the opposite. It is
better to open code in fsdev_throttle_configure_iolimits().

> +static void fsdev_throttle_read_timer_cb(void *opaque)
> +{
> +    fsdev_throttle_timer_cb(opaque, false);
> +}
> +
> +static void fsdev_throttle_write_timer_cb(void *opaque)
> +{
> +    fsdev_throttle_timer_cb(opaque, true);
> +}
> +
> +static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
> +{
> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
> +          qemu_opt_get_number(opts, "throttling.bps-total", 0);
> +    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
> +          qemu_opt_get_number(opts, "throttling.bps-read", 0);
> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
> +          qemu_opt_get_number(opts, "throttling.bps-write", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
> +          qemu_opt_get_number(opts, "throttling.iops-total", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
> +          qemu_opt_get_number(opts, "throttling.iops-read", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
> +          qemu_opt_get_number(opts, "throttling.iops-write", 0);
> +
> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
> +          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
> +    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
> +          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
> +          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
> +          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_READ].max =
> +          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
> +          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
> +
> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
> +          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
> +    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
> +          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
> +          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
> +          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
> +    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
> +          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
> +          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
> +    fst->cfg.op_size =
> +          qemu_opt_get_number(opts, "throttling.iops-size", 0);
> +}
> +
> +int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
> +{
> +    Error *err = NULL;
> +
> +    /* Parse and set populate the cfg structure */
> +    fsdev_parse_io_limit_opts(opts, fst);
> +
> +    if (!throttle_is_valid(&fst->cfg, &err)) {
> +        error_report("Throttle configuration is not valid");

throttle_is_valid() already populates err with a meaningful message and
you should use it:

        error_reportf_err(err, "Throttle configuration is not valid: ");

> +        return -1;
> +    }
> +    if (throttle_enabled(&fst->cfg)) {
> +        fst->aioctx = qemu_get_aio_context();
> +        if (!fst->aioctx) {
> +            error_report("Failed to create AIO Context");
> +            return -1;
> +        }

qemu_get_aio_context() returns qemu_aio_context which is initialized in
qemu_init_main_loop(). It is guarantied to be non NULL and if it is, we'd
better dump core.

> +        throttle_init(&fst->ts);
> +        throttle_timers_init(&fst->tt,
> +                             fst->aioctx,
> +                             QEMU_CLOCK_REALTIME,
> +                             fsdev_throttle_read_timer_cb,
> +                             fsdev_throttle_write_timer_cb,
> +                             fst);
> +        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
> +        qemu_co_queue_init(&fst->throttled_reqs[0]);
> +        qemu_co_queue_init(&fst->throttled_reqs[1]);
> +        fst->pending_reqs[0] = 0;
> +        fst->pending_reqs[1] = 0;
> +        fst->any_timer_armed[0] = false;
> +        fst->any_timer_armed[1] = false;
> +        qemu_mutex_init(&fst->lock);
> +        fsdev_iolimits_enable(fst);
> +   } else {
> +        fsdev_iolimits_disable(fst);
> +   }
> +   return 0;
> +}
> +
> +void fsdev_throttle_request(FsThrottle *fst, bool is_write, ssize_t bytes)
> +{
> +    if (fsdev_throttle_enabled(fst)) {
> +        qemu_mutex_lock(&fst->lock);
> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);

Even if the QEMU code is supposed to follow C99, I think it is prettier to
declare variables at the beginning of the code block.

           bool must_wait;

           qemu_mutex_lock(&fst->lock);
           must_wait = fsdev_throttle_check_for_wait(fst, is_write);

> +        if (must_wait || fst->pending_reqs[is_write]) {
> +            fst->pending_reqs[is_write]++;
> +            qemu_mutex_unlock(&fst->lock);
> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
> +            qemu_mutex_lock(&fst->lock);
> +            fst->pending_reqs[is_write]--;
> +       }
> +       throttle_account(&fst->ts, is_write, bytes);
> +       fsdev_throttle_schedule_next_request(fst, is_write);
> +       qemu_mutex_unlock(&fst->lock);
> +    }
> +}
> +
> +void fsdev_throttle_cleanup(FsThrottle *fst)
> +{
> +    throttle_timers_destroy(&fst->tt);
> +    qemu_mutex_destroy(&fst->lock);
> +}
> diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
> new file mode 100644
> index 0000000..73dd367
> --- /dev/null
> +++ b/fsdev/qemu-fsdev-throttle.h
> @@ -0,0 +1,42 @@
> +/*
> + * Fsdev Throttle
> + *
> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
> + *
> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * (at your option) any later version.
> + *
> + * See the COPYING file in the top-level directory for details.
> + *
> + */
> +
> +#ifndef _FSDEV_THROTTLE_H
> +#define _FSDEV_THROTTLE_H
> +
> +#include "block/aio.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/coroutine.h"
> +#include "qemu/throttle.h"
> +
> +typedef struct FsThrottle {
> +    ThrottleState ts;
> +    ThrottleTimers tt;
> +    AioContext   *aioctx;
> +    ThrottleConfig cfg;
> +    bool enabled;
> +    CoQueue      throttled_reqs[2];
> +    unsigned     pending_reqs[2];
> +    bool any_timer_armed[2];
> +    QemuMutex lock;
> +} FsThrottle;
> +
> +bool fsdev_throttle_enabled(FsThrottle *);
> +
> +int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
> +
> +void fsdev_throttle_request(FsThrottle *, bool , ssize_t);
> +
> +void fsdev_throttle_cleanup(FsThrottle *);
> +#endif /* _FSDEV_THROTTLE_H */
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index 3f271fc..b1009fc 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>                               const struct iovec *iov,
>                               int iovcnt, off_t offset)
>  {
> -    ssize_t ret
> -;
> +    ssize_t ret;
> +
>  #ifdef CONFIG_PREADV
>      ret = pwritev(fs->fd, iov, iovcnt, offset);
>  #else
> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>      const char *sec_model = qemu_opt_get(opts, "security_model");
>      const char *path = qemu_opt_get(opts, "path");
>  
> +    FsThrottle *fst = &fse->fst;
> +
>      if (!sec_model) {
>          error_report("Security model not specified, local fs needs security model");
>          error_printf("valid options are:"
> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>          error_report("fsdev: No path specified");
>          return -1;
>      }
> +
> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
> +        return -1;
> +    }
> +
>      fse->path = g_strdup(path);
>  
>      return 0;
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index dfe293d..3748ba9 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -3490,6 +3490,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
>          error_setg(errp, "share path %s is not a directory", fse->path);
>          goto out;
>      }
> +
> +    /* Throttle structure initialization */

I'm still thinking this comment paraphrases the code :)

> +    s->ctx.fst = &fse->fst;
> +
>      v9fs_path_free(&path);
>  
>      rc = 0;
> @@ -3504,6 +3508,9 @@ out:
>  
>  void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
>  {
> +    if (fsdev_throttle_enabled(s->ctx.fst)) {
> +        fsdev_throttle_cleanup(s->ctx.fst);
> +    }
>      g_free(s->ctx.fs_root);
>      g_free(s->tag);
>  }
> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
> index 10343c0..65897d5 100644
> --- a/hw/9pfs/cofile.c
> +++ b/hw/9pfs/cofile.c
> @@ -16,6 +16,7 @@
>  #include "qemu/thread.h"
>  #include "qemu/coroutine.h"
>  #include "coth.h"
> +#include "fsdev/qemu-fsdev-throttle.h"
>  
>  int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
>                     V9fsStatDotl *v9stat)
> @@ -242,6 +243,7 @@ int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
>      int err;
>      V9fsState *s = pdu->s;
>  
> +    fsdev_throttle_request(s->ctx.fst, true, iov->iov_len);

I think it would make more sense to move this after the cancellation point.

>      if (v9fs_request_cancelled(pdu)) {
>          return -EINTR;
>      }
> @@ -261,6 +263,7 @@ int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
>      int err;
>      V9fsState *s = pdu->s;
>  
> +    fsdev_throttle_request(s->ctx.fst, false, iov->iov_len);

Same here.

>      if (v9fs_request_cancelled(pdu)) {
>          return -EINTR;
>      }

Cheers.

--
Greg
Pradeep Jagadeesh Sept. 21, 2016, 10:22 a.m. UTC | #5
Hi Greg,

Thanks for having a look at patchset.
See the replies below.

> On Fri, 16 Sep 2016 04:33:36 -0400
> Pradeep Jagadeesh <pradeepkiruvale@gmail.com> wrote:
>
>> Uses throttling APIs to limit I/O bandwidth and number of operations on the
>> fsdev devices.
>>
>> Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>> ---
>
> Hi Pradeep,
>
> Please find some comments below.
>
>>  fsdev/Makefile.objs         |   1 +
>>  fsdev/file-op-9p.h          |   3 +
>>  fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++
>>  fsdev/qemu-fsdev-throttle.c | 191 ++++++++++++++++++++++++++++++++++++++++++++
>>  fsdev/qemu-fsdev-throttle.h |  42 ++++++++++
>
> Is the qemu- prefix really needed ?
Just followed the previous file naming convention,
like qemu-fsdev-opts.c
>
>>  hw/9pfs/9p-local.c          |  11 ++-
>>  hw/9pfs/9p.c                |   7 ++
>>  hw/9pfs/cofile.c            |   3 +
>>  8 files changed, 332 insertions(+), 2 deletions(-)
>>  create mode 100644 fsdev/qemu-fsdev-throttle.c
>>  create mode 100644 fsdev/qemu-fsdev-throttle.h
>>
>> This adds the support for the 9p-local driver.
>> For now this functionality can be enabled only through qemu cli options.
>> QMP interface and support to other drivers need further extensions.
>> To make it simple for other drivers, the throttle code has been put in
>> separate files.
>>
>> v1 -> v2:
>>
>> -Fixed FsContext redeclaration issue
>> -Removed couple of function declarations from 9p-throttle.h
>> -Fixed some of the .help messages
>>
>> v2 -> v3:
>>
>> -Addressed follwing comments by Claudio Fontana
>>  -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
>>  -Checking throttle structure validity before initializing other structures
>>   in fsdev_throttle_configure_iolimits
>>
>> -Addressed following comments by Greg Kurz
>>  -Moved the code from 9pfs directory to fsdev directory, because the throttling
>>   is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
>>  -Renamed throttling cli options to throttling.*, as in QMP cli options
>>  -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
>>  -Using throttle_enabled() function to set the thottle enabled flag for fsdev.
>>
>> diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
>> index 1b120a4..2c6da2d 100644
>> --- a/fsdev/Makefile.objs
>> +++ b/fsdev/Makefile.objs
>> @@ -7,6 +7,7 @@ common-obj-y = qemu-fsdev-dummy.o
>>  endif
>>  common-obj-y += qemu-fsdev-opts.o
>>
>> +common-obj-y += qemu-fsdev-throttle.o
>>  # Toplevel always builds this; targets without virtio will put it in
>>  # common-obj-y
>>  common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
>> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
>> index 6db9fea..33fe822 100644
>> --- a/fsdev/file-op-9p.h
>> +++ b/fsdev/file-op-9p.h
>> @@ -17,6 +17,7 @@
>>  #include <dirent.h>
>>  #include <utime.h>
>>  #include <sys/vfs.h>
>> +#include "qemu-fsdev-throttle.h"
>>
>>  #define SM_LOCAL_MODE_BITS    0600
>>  #define SM_LOCAL_DIR_MODE_BITS    0700
>> @@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
>>      char *path;
>>      int export_flags;
>>      FileOperations *ops;
>> +    FsThrottle fst;
>>  } FsDriverEntry;
>>
>>  typedef struct FsContext
>> @@ -83,6 +85,7 @@ typedef struct FsContext
>>      int export_flags;
>>      struct xattr_operations **xops;
>>      struct extended_ops exops;
>> +    FsThrottle *fst;
>>      /* fs driver specific data */
>>      void *private;
>>  } FsContext;
>> diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
>> index 1dd8c7a..395d497 100644
>> --- a/fsdev/qemu-fsdev-opts.c
>> +++ b/fsdev/qemu-fsdev-opts.c
>> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>>          }, {
>>              .name = "sock_fd",
>>              .type = QEMU_OPT_NUMBER,
>> +        }, {
>> +            .name = "throttling.iops-total",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total I/O operations per second",
>> +        },{
>> +            .name = "throttling.iops-read",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit read operations per second",
>> +        },{
>> +            .name = "throttling.iops-write",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit write operations per second",
>> +        },{
>> +            .name = "throttling.bps-total",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total bytes per second",
>> +        },{
>> +            .name = "throttling.bps-read",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit read bytes per second",
>> +        },{
>> +            .name = "throttling.bps-write",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit write bytes per second",
>> +        },{
>> +            .name = "throttling.iops-total-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations burst",
>> +        },{
>> +            .name = "throttling.iops-read-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations read burst",
>> +        },{
>> +            .name = "throttling.iops-write-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations write burst",
>> +        },{
>> +            .name = "throttling.bps-total-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes burst",
>> +        },{
>> +            .name = "throttling.bps-read-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes read burst",
>> +        },{
>> +            .name = "throttling.bps-write-max",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes write burst",
>> +        },{
>> +            .name = "throttling.iops-total-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iops-total-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.iops-read-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iops-read-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.iops-write-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iops-write-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.bps-total-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bps-total-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.bps-read-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bps-read-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.bps-write-max-length",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bps-write-max burst period, in seconds",
>> +        },{
>> +            .name = "throttling.iops-size",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "when limiting by iops max size of an I/O in bytes",
>>          },
>>
>>          { /*End of list */ }
>> diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
>> new file mode 100644
>> index 0000000..0adccd1
>> --- /dev/null
>> +++ b/fsdev/qemu-fsdev-throttle.c
>> @@ -0,0 +1,191 @@
>> +/*
>> + * Fsdev Throttle
>> + *
>> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
>> + *
>> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * (at your option) any later version.
>> + *
>> + * See the COPYING file in the top-level directory for details.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/error-report.h"
>> +#include "qapi/error.h"
>> +#include "qemu-fsdev-throttle.h"
>> +
>> +static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
>> +{
>> +    if (fst->any_timer_armed[is_write]) {
>> +        return true;
>> +    } else {
>> +        return throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
>> +    }
>> +}
>> +
>> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
>> +{
>> +    bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>
> Let's add a newline here.
Done!
>
>> +    if (!fst->pending_reqs[is_write]) {
>> +        return;
>> +    }
>> +    if (!must_wait) {
>> +        if (qemu_in_coroutine() &&
>> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
>> +            ;
>> +       } else {
>> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
>> +           timer_mod(fst->tt.timers[is_write], now + 1);
>> +           fst->any_timer_armed[is_write] = true;
>> +       }
>> +   }
>> +}
>> +
>> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
>> +{
>> +    bool empty_queue;
>
> Same here.
Done!
>
>> +    qemu_mutex_lock(&fst->lock);
>> +    fst->any_timer_armed[is_write] = false;
>> +    qemu_mutex_unlock(&fst->lock);
>> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
>> +    if (empty_queue) {
>> +        qemu_mutex_lock(&fst->lock);
>> +        fsdev_throttle_schedule_next_request(fst, is_write);
>> +        qemu_mutex_unlock(&fst->lock);
>> +    }
>> +}
>> +
>> +bool fsdev_throttle_enabled(FsThrottle *fst)
>> +{
>> +    return fst->enabled;
>> +}
>> +
>> +static void fsdev_iolimits_enable(FsThrottle *fst)
>> +{
>> +    fst->enabled = true;
>> +}
>> +
>> +static void fsdev_iolimits_disable(FsThrottle *fst)
>> +{
>> +    fst->enabled = false;
>> +}
>> +
>
> fsdev_throttle_configure_iolimits() is the only function that will ever
> write to fst->enabled. The helpers seem to indicate the opposite. It is
> better to open code in fsdev_throttle_configure_iolimits().
You mean just use  fst->enabled = true; inside the 
fsdev_throttle_configure_iolimits().?
If that is the case, I thought these helpers are useful in future when 
we have qmp interfaces to enable,disable the throttle dynamically/run-time.
>
>> +static void fsdev_throttle_read_timer_cb(void *opaque)
>> +{
>> +    fsdev_throttle_timer_cb(opaque, false);
>> +}
>> +
>> +static void fsdev_throttle_write_timer_cb(void *opaque)
>> +{
>> +    fsdev_throttle_timer_cb(opaque, true);
>> +}
>> +
>> +static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
>> +{
>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
>> +          qemu_opt_get_number(opts, "throttling.bps-total", 0);
>> +    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
>> +          qemu_opt_get_number(opts, "throttling.bps-read", 0);
>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
>> +          qemu_opt_get_number(opts, "throttling.bps-write", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
>> +          qemu_opt_get_number(opts, "throttling.iops-total", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
>> +          qemu_opt_get_number(opts, "throttling.iops-read", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
>> +          qemu_opt_get_number(opts, "throttling.iops-write", 0);
>> +
>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
>> +          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
>> +    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
>> +          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
>> +          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
>> +          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_READ].max =
>> +          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
>> +          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
>> +
>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
>> +          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
>> +    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
>> +          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
>> +          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
>> +          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
>> +    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
>> +          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
>> +          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
>> +    fst->cfg.op_size =
>> +          qemu_opt_get_number(opts, "throttling.iops-size", 0);
>> +}
>> +
>> +int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
>> +{
>> +    Error *err = NULL;
>> +
>> +    /* Parse and set populate the cfg structure */
>> +    fsdev_parse_io_limit_opts(opts, fst);
>> +
>> +    if (!throttle_is_valid(&fst->cfg, &err)) {
>> +        error_report("Throttle configuration is not valid");
>
> throttle_is_valid() already populates err with a meaningful message and
> you should use it:
>
>         error_reportf_err(err, "Throttle configuration is not valid: ");
>
OK,fixed.
>> +        return -1;
>> +    }
>> +    if (throttle_enabled(&fst->cfg)) {
>> +        fst->aioctx = qemu_get_aio_context();
>> +        if (!fst->aioctx) {
>> +            error_report("Failed to create AIO Context");
>> +            return -1;
>> +        }
>
> qemu_get_aio_context() returns qemu_aio_context which is initialized in
> qemu_init_main_loop(). It is guarantied to be non NULL and if it is, we'd
> better dump core.
>
OK, Is there any specific dump api that exists in qemu?
>> +        throttle_init(&fst->ts);
>> +        throttle_timers_init(&fst->tt,
>> +                             fst->aioctx,
>> +                             QEMU_CLOCK_REALTIME,
>> +                             fsdev_throttle_read_timer_cb,
>> +                             fsdev_throttle_write_timer_cb,
>> +                             fst);
>> +        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
>> +        qemu_co_queue_init(&fst->throttled_reqs[0]);
>> +        qemu_co_queue_init(&fst->throttled_reqs[1]);
>> +        fst->pending_reqs[0] = 0;
>> +        fst->pending_reqs[1] = 0;
>> +        fst->any_timer_armed[0] = false;
>> +        fst->any_timer_armed[1] = false;
>> +        qemu_mutex_init(&fst->lock);
>> +        fsdev_iolimits_enable(fst);
>> +   } else {
>> +        fsdev_iolimits_disable(fst);
>> +   }
>> +   return 0;
>> +}
>> +
>> +void fsdev_throttle_request(FsThrottle *fst, bool is_write, ssize_t bytes)
>> +{
>> +    if (fsdev_throttle_enabled(fst)) {
>> +        qemu_mutex_lock(&fst->lock);
>> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>
> Even if the QEMU code is supposed to follow C99, I think it is prettier to
> declare variables at the beginning of the code block.
>
>            bool must_wait;
OK,fixed.
>
>            qemu_mutex_lock(&fst->lock);
>            must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>
>> +        if (must_wait || fst->pending_reqs[is_write]) {
>> +            fst->pending_reqs[is_write]++;
>> +            qemu_mutex_unlock(&fst->lock);
>> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
>> +            qemu_mutex_lock(&fst->lock);
>> +            fst->pending_reqs[is_write]--;
>> +       }
>> +       throttle_account(&fst->ts, is_write, bytes);
>> +       fsdev_throttle_schedule_next_request(fst, is_write);
>> +       qemu_mutex_unlock(&fst->lock);
>> +    }
>> +}
>> +
>> +void fsdev_throttle_cleanup(FsThrottle *fst)
>> +{
>> +    throttle_timers_destroy(&fst->tt);
>> +    qemu_mutex_destroy(&fst->lock);
>> +}
>> diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
>> new file mode 100644
>> index 0000000..73dd367
>> --- /dev/null
>> +++ b/fsdev/qemu-fsdev-throttle.h
>> @@ -0,0 +1,42 @@
>> +/*
>> + * Fsdev Throttle
>> + *
>> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
>> + *
>> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * (at your option) any later version.
>> + *
>> + * See the COPYING file in the top-level directory for details.
>> + *
>> + */
>> +
>> +#ifndef _FSDEV_THROTTLE_H
>> +#define _FSDEV_THROTTLE_H
>> +
>> +#include "block/aio.h"
>> +#include "qemu/main-loop.h"
>> +#include "qemu/coroutine.h"
>> +#include "qemu/throttle.h"
>> +
>> +typedef struct FsThrottle {
>> +    ThrottleState ts;
>> +    ThrottleTimers tt;
>> +    AioContext   *aioctx;
>> +    ThrottleConfig cfg;
>> +    bool enabled;
>> +    CoQueue      throttled_reqs[2];
>> +    unsigned     pending_reqs[2];
>> +    bool any_timer_armed[2];
>> +    QemuMutex lock;
>> +} FsThrottle;
>> +
>> +bool fsdev_throttle_enabled(FsThrottle *);
>> +
>> +int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
>> +
>> +void fsdev_throttle_request(FsThrottle *, bool , ssize_t);
>> +
>> +void fsdev_throttle_cleanup(FsThrottle *);
>> +#endif /* _FSDEV_THROTTLE_H */
>> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
>> index 3f271fc..b1009fc 100644
>> --- a/hw/9pfs/9p-local.c
>> +++ b/hw/9pfs/9p-local.c
>> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>>                               const struct iovec *iov,
>>                               int iovcnt, off_t offset)
>>  {
>> -    ssize_t ret
>> -;
>> +    ssize_t ret;
>> +
>>  #ifdef CONFIG_PREADV
>>      ret = pwritev(fs->fd, iov, iovcnt, offset);
>>  #else
>> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>      const char *sec_model = qemu_opt_get(opts, "security_model");
>>      const char *path = qemu_opt_get(opts, "path");
>>
>> +    FsThrottle *fst = &fse->fst;
>> +
>>      if (!sec_model) {
>>          error_report("Security model not specified, local fs needs security model");
>>          error_printf("valid options are:"
>> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>          error_report("fsdev: No path specified");
>>          return -1;
>>      }
>> +
>> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
>> +        return -1;
>> +    }
>> +
>>      fse->path = g_strdup(path);
>>
>>      return 0;
>> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
>> index dfe293d..3748ba9 100644
>> --- a/hw/9pfs/9p.c
>> +++ b/hw/9pfs/9p.c
>> @@ -3490,6 +3490,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
>>          error_setg(errp, "share path %s is not a directory", fse->path);
>>          goto out;
>>      }
>> +
>> +    /* Throttle structure initialization */
>
> I'm still thinking this comment paraphrases the code :)
I missed it!:)
>
>> +    s->ctx.fst = &fse->fst;
>> +
>>      v9fs_path_free(&path);
>>
>>      rc = 0;
>> @@ -3504,6 +3508,9 @@ out:
>>
>>  void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
>>  {
>> +    if (fsdev_throttle_enabled(s->ctx.fst)) {
>> +        fsdev_throttle_cleanup(s->ctx.fst);
>> +    }
>>      g_free(s->ctx.fs_root);
>>      g_free(s->tag);
>>  }
>> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
>> index 10343c0..65897d5 100644
>> --- a/hw/9pfs/cofile.c
>> +++ b/hw/9pfs/cofile.c
>> @@ -16,6 +16,7 @@
>>  #include "qemu/thread.h"
>>  #include "qemu/coroutine.h"
>>  #include "coth.h"
>> +#include "fsdev/qemu-fsdev-throttle.h"
>>
>>  int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
>>                     V9fsStatDotl *v9stat)
>> @@ -242,6 +243,7 @@ int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
>>      int err;
>>      V9fsState *s = pdu->s;
>>
>> +    fsdev_throttle_request(s->ctx.fst, true, iov->iov_len);
>
> I think it would make more sense to move this after the cancellation point.
Done!
>
>>      if (v9fs_request_cancelled(pdu)) {
>>          return -EINTR;
>>      }
>> @@ -261,6 +263,7 @@ int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
>>      int err;
>>      V9fsState *s = pdu->s;
>>
>> +    fsdev_throttle_request(s->ctx.fst, false, iov->iov_len);
>
> Same here.
>
Done!
>>      if (v9fs_request_cancelled(pdu)) {
>>          return -EINTR;
>>      }
>
> Cheers.
>
> --
> Greg
>
Regards,
Pradeep
Greg Kurz Sept. 21, 2016, 12:10 p.m. UTC | #6
On Wed, 21 Sep 2016 12:22:25 +0200
Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> wrote:

> Hi Greg,
> 
> Thanks for having a look at patchset.
> See the replies below.
> 
> > On Fri, 16 Sep 2016 04:33:36 -0400
> > Pradeep Jagadeesh <pradeepkiruvale@gmail.com> wrote:
> >  
> >> Uses throttling APIs to limit I/O bandwidth and number of operations on the
> >> fsdev devices.
> >>
> >> Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> >> ---  
> >
> > Hi Pradeep,
> >
> > Please find some comments below.
> >  
> >>  fsdev/Makefile.objs         |   1 +
> >>  fsdev/file-op-9p.h          |   3 +
> >>  fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++
> >>  fsdev/qemu-fsdev-throttle.c | 191 ++++++++++++++++++++++++++++++++++++++++++++
> >>  fsdev/qemu-fsdev-throttle.h |  42 ++++++++++  
> >
> > Is the qemu- prefix really needed ?  
> Just followed the previous file naming convention,
> like qemu-fsdev-opts.c
> >  
> >>  hw/9pfs/9p-local.c          |  11 ++-
> >>  hw/9pfs/9p.c                |   7 ++
> >>  hw/9pfs/cofile.c            |   3 +
> >>  8 files changed, 332 insertions(+), 2 deletions(-)
> >>  create mode 100644 fsdev/qemu-fsdev-throttle.c
> >>  create mode 100644 fsdev/qemu-fsdev-throttle.h
> >>
> >> This adds the support for the 9p-local driver.
> >> For now this functionality can be enabled only through qemu cli options.
> >> QMP interface and support to other drivers need further extensions.
> >> To make it simple for other drivers, the throttle code has been put in
> >> separate files.
> >>
> >> v1 -> v2:
> >>
> >> -Fixed FsContext redeclaration issue
> >> -Removed couple of function declarations from 9p-throttle.h
> >> -Fixed some of the .help messages
> >>
> >> v2 -> v3:
> >>
> >> -Addressed follwing comments by Claudio Fontana
> >>  -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
> >>  -Checking throttle structure validity before initializing other structures
> >>   in fsdev_throttle_configure_iolimits
> >>
> >> -Addressed following comments by Greg Kurz
> >>  -Moved the code from 9pfs directory to fsdev directory, because the throttling
> >>   is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
> >>  -Renamed throttling cli options to throttling.*, as in QMP cli options
> >>  -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
> >>  -Using throttle_enabled() function to set the thottle enabled flag for fsdev.
> >>
> >> diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
> >> index 1b120a4..2c6da2d 100644
> >> --- a/fsdev/Makefile.objs
> >> +++ b/fsdev/Makefile.objs
> >> @@ -7,6 +7,7 @@ common-obj-y = qemu-fsdev-dummy.o
> >>  endif
> >>  common-obj-y += qemu-fsdev-opts.o
> >>
> >> +common-obj-y += qemu-fsdev-throttle.o
> >>  # Toplevel always builds this; targets without virtio will put it in
> >>  # common-obj-y
> >>  common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
> >> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
> >> index 6db9fea..33fe822 100644
> >> --- a/fsdev/file-op-9p.h
> >> +++ b/fsdev/file-op-9p.h
> >> @@ -17,6 +17,7 @@
> >>  #include <dirent.h>
> >>  #include <utime.h>
> >>  #include <sys/vfs.h>
> >> +#include "qemu-fsdev-throttle.h"
> >>
> >>  #define SM_LOCAL_MODE_BITS    0600
> >>  #define SM_LOCAL_DIR_MODE_BITS    0700
> >> @@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
> >>      char *path;
> >>      int export_flags;
> >>      FileOperations *ops;
> >> +    FsThrottle fst;
> >>  } FsDriverEntry;
> >>
> >>  typedef struct FsContext
> >> @@ -83,6 +85,7 @@ typedef struct FsContext
> >>      int export_flags;
> >>      struct xattr_operations **xops;
> >>      struct extended_ops exops;
> >> +    FsThrottle *fst;
> >>      /* fs driver specific data */
> >>      void *private;
> >>  } FsContext;
> >> diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
> >> index 1dd8c7a..395d497 100644
> >> --- a/fsdev/qemu-fsdev-opts.c
> >> +++ b/fsdev/qemu-fsdev-opts.c
> >> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
> >>          }, {
> >>              .name = "sock_fd",
> >>              .type = QEMU_OPT_NUMBER,
> >> +        }, {
> >> +            .name = "throttling.iops-total",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit total I/O operations per second",
> >> +        },{
> >> +            .name = "throttling.iops-read",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit read operations per second",
> >> +        },{
> >> +            .name = "throttling.iops-write",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit write operations per second",
> >> +        },{
> >> +            .name = "throttling.bps-total",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit total bytes per second",
> >> +        },{
> >> +            .name = "throttling.bps-read",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit read bytes per second",
> >> +        },{
> >> +            .name = "throttling.bps-write",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "limit write bytes per second",
> >> +        },{
> >> +            .name = "throttling.iops-total-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "I/O operations burst",
> >> +        },{
> >> +            .name = "throttling.iops-read-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "I/O operations read burst",
> >> +        },{
> >> +            .name = "throttling.iops-write-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "I/O operations write burst",
> >> +        },{
> >> +            .name = "throttling.bps-total-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "total bytes burst",
> >> +        },{
> >> +            .name = "throttling.bps-read-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "total bytes read burst",
> >> +        },{
> >> +            .name = "throttling.bps-write-max",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "total bytes write burst",
> >> +        },{
> >> +            .name = "throttling.iops-total-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the iops-total-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.iops-read-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the iops-read-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.iops-write-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the iops-write-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.bps-total-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the bps-total-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.bps-read-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the bps-read-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.bps-write-max-length",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "length of the bps-write-max burst period, in seconds",
> >> +        },{
> >> +            .name = "throttling.iops-size",
> >> +            .type = QEMU_OPT_NUMBER,
> >> +            .help = "when limiting by iops max size of an I/O in bytes",
> >>          },
> >>
> >>          { /*End of list */ }
> >> diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
> >> new file mode 100644
> >> index 0000000..0adccd1
> >> --- /dev/null
> >> +++ b/fsdev/qemu-fsdev-throttle.c
> >> @@ -0,0 +1,191 @@
> >> +/*
> >> + * Fsdev Throttle
> >> + *
> >> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
> >> + *
> >> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> >> + *
> >> + * This work is licensed under the terms of the GNU GPL, version 2 or
> >> + * (at your option) any later version.
> >> + *
> >> + * See the COPYING file in the top-level directory for details.
> >> + *
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qemu/error-report.h"
> >> +#include "qapi/error.h"
> >> +#include "qemu-fsdev-throttle.h"
> >> +
> >> +static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
> >> +{
> >> +    if (fst->any_timer_armed[is_write]) {
> >> +        return true;
> >> +    } else {
> >> +        return throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
> >> +    }
> >> +}
> >> +
> >> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
> >> +{
> >> +    bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);  
> >
> > Let's add a newline here.  
> Done!
> >  
> >> +    if (!fst->pending_reqs[is_write]) {
> >> +        return;
> >> +    }
> >> +    if (!must_wait) {
> >> +        if (qemu_in_coroutine() &&
> >> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
> >> +            ;
> >> +       } else {
> >> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
> >> +           timer_mod(fst->tt.timers[is_write], now + 1);
> >> +           fst->any_timer_armed[is_write] = true;
> >> +       }
> >> +   }
> >> +}
> >> +
> >> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
> >> +{
> >> +    bool empty_queue;  
> >
> > Same here.  
> Done!
> >  
> >> +    qemu_mutex_lock(&fst->lock);
> >> +    fst->any_timer_armed[is_write] = false;
> >> +    qemu_mutex_unlock(&fst->lock);
> >> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
> >> +    if (empty_queue) {
> >> +        qemu_mutex_lock(&fst->lock);
> >> +        fsdev_throttle_schedule_next_request(fst, is_write);
> >> +        qemu_mutex_unlock(&fst->lock);
> >> +    }
> >> +}
> >> +
> >> +bool fsdev_throttle_enabled(FsThrottle *fst)
> >> +{
> >> +    return fst->enabled;
> >> +}
> >> +
> >> +static void fsdev_iolimits_enable(FsThrottle *fst)
> >> +{
> >> +    fst->enabled = true;
> >> +}
> >> +
> >> +static void fsdev_iolimits_disable(FsThrottle *fst)
> >> +{
> >> +    fst->enabled = false;
> >> +}
> >> +  
> >
> > fsdev_throttle_configure_iolimits() is the only function that will ever
> > write to fst->enabled. The helpers seem to indicate the opposite. It is
> > better to open code in fsdev_throttle_configure_iolimits().  
> You mean just use  fst->enabled = true; inside the 
> fsdev_throttle_configure_iolimits().?
> If that is the case, I thought these helpers are useful in future when 
> we have qmp interfaces to enable,disable the throttle dynamically/run-time.

Hmm... in this case, something looks wrong.

If we start with a valid throttling config, then disable it with QMP and then
unrealize the device, we won't do the cleanup... fst->enabled looks like
a premature optimization which causes confusion actually.

I'd prefer fsdev to behave like blockdev, which doesn't have an enabled flag.

Please drop it completely. :)

> >  
> >> +static void fsdev_throttle_read_timer_cb(void *opaque)
> >> +{
> >> +    fsdev_throttle_timer_cb(opaque, false);
> >> +}
> >> +
> >> +static void fsdev_throttle_write_timer_cb(void *opaque)
> >> +{
> >> +    fsdev_throttle_timer_cb(opaque, true);
> >> +}
> >> +
> >> +static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
> >> +{
> >> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
> >> +          qemu_opt_get_number(opts, "throttling.bps-total", 0);
> >> +    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
> >> +          qemu_opt_get_number(opts, "throttling.bps-read", 0);
> >> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
> >> +          qemu_opt_get_number(opts, "throttling.bps-write", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
> >> +          qemu_opt_get_number(opts, "throttling.iops-total", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
> >> +          qemu_opt_get_number(opts, "throttling.iops-read", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
> >> +          qemu_opt_get_number(opts, "throttling.iops-write", 0);
> >> +
> >> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
> >> +          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
> >> +    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
> >> +          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
> >> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
> >> +          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
> >> +          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_READ].max =
> >> +          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
> >> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
> >> +          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
> >> +
> >> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
> >> +          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
> >> +    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
> >> +          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
> >> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
> >> +          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
> >> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
> >> +          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
> >> +    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
> >> +          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
> >> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
> >> +          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
> >> +    fst->cfg.op_size =
> >> +          qemu_opt_get_number(opts, "throttling.iops-size", 0);
> >> +}
> >> +
> >> +int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
> >> +{
> >> +    Error *err = NULL;
> >> +
> >> +    /* Parse and set populate the cfg structure */
> >> +    fsdev_parse_io_limit_opts(opts, fst);
> >> +
> >> +    if (!throttle_is_valid(&fst->cfg, &err)) {
> >> +        error_report("Throttle configuration is not valid");  
> >
> > throttle_is_valid() already populates err with a meaningful message and
> > you should use it:
> >
> >         error_reportf_err(err, "Throttle configuration is not valid: ");
> >  
> OK,fixed.
> >> +        return -1;
> >> +    }
> >> +    if (throttle_enabled(&fst->cfg)) {
> >> +        fst->aioctx = qemu_get_aio_context();
> >> +        if (!fst->aioctx) {
> >> +            error_report("Failed to create AIO Context");
> >> +            return -1;
> >> +        }  
> >
> > qemu_get_aio_context() returns qemu_aio_context which is initialized in
> > qemu_init_main_loop(). It is guarantied to be non NULL and if it is, we'd
> > better dump core.
> >  
> OK, Is there any specific dump api that exists in qemu?

QEMU uses g_assert() for elaborate checks but you really don't need to check
the return value of qemu_get_aio_context(). Just grep through the code to
be convinced :)

> >> +        throttle_init(&fst->ts);
> >> +        throttle_timers_init(&fst->tt,
> >> +                             fst->aioctx,
> >> +                             QEMU_CLOCK_REALTIME,
> >> +                             fsdev_throttle_read_timer_cb,
> >> +                             fsdev_throttle_write_timer_cb,
> >> +                             fst);
> >> +        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
> >> +        qemu_co_queue_init(&fst->throttled_reqs[0]);
> >> +        qemu_co_queue_init(&fst->throttled_reqs[1]);
> >> +        fst->pending_reqs[0] = 0;
> >> +        fst->pending_reqs[1] = 0;
> >> +        fst->any_timer_armed[0] = false;
> >> +        fst->any_timer_armed[1] = false;
> >> +        qemu_mutex_init(&fst->lock);
> >> +        fsdev_iolimits_enable(fst);
> >> +   } else {
> >> +        fsdev_iolimits_disable(fst);
> >> +   }
> >> +   return 0;
> >> +}
> >> +
> >> +void fsdev_throttle_request(FsThrottle *fst, bool is_write, ssize_t bytes)
> >> +{
> >> +    if (fsdev_throttle_enabled(fst)) {
> >> +        qemu_mutex_lock(&fst->lock);
> >> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);  
> >
> > Even if the QEMU code is supposed to follow C99, I think it is prettier to
> > declare variables at the beginning of the code block.
> >
> >            bool must_wait;  
> OK,fixed.
> >
> >            qemu_mutex_lock(&fst->lock);
> >            must_wait = fsdev_throttle_check_for_wait(fst, is_write);
> >  
> >> +        if (must_wait || fst->pending_reqs[is_write]) {
> >> +            fst->pending_reqs[is_write]++;
> >> +            qemu_mutex_unlock(&fst->lock);
> >> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
> >> +            qemu_mutex_lock(&fst->lock);
> >> +            fst->pending_reqs[is_write]--;
> >> +       }
> >> +       throttle_account(&fst->ts, is_write, bytes);
> >> +       fsdev_throttle_schedule_next_request(fst, is_write);
> >> +       qemu_mutex_unlock(&fst->lock);
> >> +    }
> >> +}
> >> +
> >> +void fsdev_throttle_cleanup(FsThrottle *fst)
> >> +{
> >> +    throttle_timers_destroy(&fst->tt);
> >> +    qemu_mutex_destroy(&fst->lock);
> >> +}
> >> diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
> >> new file mode 100644
> >> index 0000000..73dd367
> >> --- /dev/null
> >> +++ b/fsdev/qemu-fsdev-throttle.h
> >> @@ -0,0 +1,42 @@
> >> +/*
> >> + * Fsdev Throttle
> >> + *
> >> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
> >> + *
> >> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
> >> + *
> >> + * This work is licensed under the terms of the GNU GPL, version 2 or
> >> + * (at your option) any later version.
> >> + *
> >> + * See the COPYING file in the top-level directory for details.
> >> + *
> >> + */
> >> +
> >> +#ifndef _FSDEV_THROTTLE_H
> >> +#define _FSDEV_THROTTLE_H
> >> +
> >> +#include "block/aio.h"
> >> +#include "qemu/main-loop.h"
> >> +#include "qemu/coroutine.h"
> >> +#include "qemu/throttle.h"
> >> +
> >> +typedef struct FsThrottle {
> >> +    ThrottleState ts;
> >> +    ThrottleTimers tt;
> >> +    AioContext   *aioctx;
> >> +    ThrottleConfig cfg;
> >> +    bool enabled;
> >> +    CoQueue      throttled_reqs[2];
> >> +    unsigned     pending_reqs[2];
> >> +    bool any_timer_armed[2];
> >> +    QemuMutex lock;
> >> +} FsThrottle;
> >> +
> >> +bool fsdev_throttle_enabled(FsThrottle *);
> >> +
> >> +int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
> >> +
> >> +void fsdev_throttle_request(FsThrottle *, bool , ssize_t);
> >> +
> >> +void fsdev_throttle_cleanup(FsThrottle *);
> >> +#endif /* _FSDEV_THROTTLE_H */
> >> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> >> index 3f271fc..b1009fc 100644
> >> --- a/hw/9pfs/9p-local.c
> >> +++ b/hw/9pfs/9p-local.c
> >> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
> >>                               const struct iovec *iov,
> >>                               int iovcnt, off_t offset)
> >>  {
> >> -    ssize_t ret
> >> -;
> >> +    ssize_t ret;
> >> +
> >>  #ifdef CONFIG_PREADV
> >>      ret = pwritev(fs->fd, iov, iovcnt, offset);
> >>  #else
> >> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
> >>      const char *sec_model = qemu_opt_get(opts, "security_model");
> >>      const char *path = qemu_opt_get(opts, "path");
> >>
> >> +    FsThrottle *fst = &fse->fst;
> >> +
> >>      if (!sec_model) {
> >>          error_report("Security model not specified, local fs needs security model");
> >>          error_printf("valid options are:"
> >> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
> >>          error_report("fsdev: No path specified");
> >>          return -1;
> >>      }
> >> +
> >> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >>      fse->path = g_strdup(path);
> >>
> >>      return 0;
> >> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> >> index dfe293d..3748ba9 100644
> >> --- a/hw/9pfs/9p.c
> >> +++ b/hw/9pfs/9p.c
> >> @@ -3490,6 +3490,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
> >>          error_setg(errp, "share path %s is not a directory", fse->path);
> >>          goto out;
> >>      }
> >> +
> >> +    /* Throttle structure initialization */  
> >
> > I'm still thinking this comment paraphrases the code :)  
> I missed it!:)
> >  
> >> +    s->ctx.fst = &fse->fst;
> >> +
> >>      v9fs_path_free(&path);
> >>
> >>      rc = 0;
> >> @@ -3504,6 +3508,9 @@ out:
> >>
> >>  void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
> >>  {
> >> +    if (fsdev_throttle_enabled(s->ctx.fst)) {
> >> +        fsdev_throttle_cleanup(s->ctx.fst);
> >> +    }
> >>      g_free(s->ctx.fs_root);
> >>      g_free(s->tag);
> >>  }
> >> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
> >> index 10343c0..65897d5 100644
> >> --- a/hw/9pfs/cofile.c
> >> +++ b/hw/9pfs/cofile.c
> >> @@ -16,6 +16,7 @@
> >>  #include "qemu/thread.h"
> >>  #include "qemu/coroutine.h"
> >>  #include "coth.h"
> >> +#include "fsdev/qemu-fsdev-throttle.h"
> >>
> >>  int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
> >>                     V9fsStatDotl *v9stat)
> >> @@ -242,6 +243,7 @@ int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
> >>      int err;
> >>      V9fsState *s = pdu->s;
> >>
> >> +    fsdev_throttle_request(s->ctx.fst, true, iov->iov_len);  
> >
> > I think it would make more sense to move this after the cancellation point.  
> Done!
> >  
> >>      if (v9fs_request_cancelled(pdu)) {
> >>          return -EINTR;
> >>      }
> >> @@ -261,6 +263,7 @@ int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
> >>      int err;
> >>      V9fsState *s = pdu->s;
> >>
> >> +    fsdev_throttle_request(s->ctx.fst, false, iov->iov_len);  
> >
> > Same here.
> >  
> Done!
> >>      if (v9fs_request_cancelled(pdu)) {
> >>          return -EINTR;
> >>      }  
> >
> > Cheers.
> >
> > --
> > Greg
> >  
> Regards,
> Pradeep
>
Pradeep Jagadeesh Sept. 22, 2016, 11:29 a.m. UTC | #7
Hi Alberto,

> On Fri 16 Sep 2016 10:33:36 AM CEST, Pradeep Jagadeesh wrote:
>
> Hi,
>
> first of all, sorry for the late reply! Here are my comments:
>
>> --- a/fsdev/qemu-fsdev-opts.c
>> +++ b/fsdev/qemu-fsdev-opts.c
>> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>>          }, {
>>              .name = "sock_fd",
>>              .type = QEMU_OPT_NUMBER,
>> +        }, {
>> +            .name = "throttling.iops-total",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total I/O operations per second",
>   /*...*/
>> +        },{
>> +            .name = "throttling.iops-size",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "when limiting by iops max size of an I/O in bytes",
>
> It would be nice if we could factor these so we don't have to have them
> twice in the code. I think it should be doable with qemu_opts_append().
>
> It can be done later in a separate patch if you prefer.
>
>> +typedef struct FsThrottle {
>> +    ThrottleState ts;
>> +    ThrottleTimers tt;
>> +    AioContext   *aioctx;
>> +    ThrottleConfig cfg;
>> +    bool enabled;
>> +    CoQueue      throttled_reqs[2];
>> +    unsigned     pending_reqs[2];
>> +    bool any_timer_armed[2];
>> +    QemuMutex lock;
>> +} FsThrottle;
>
> You based your implementation on block/throttle-groups.c. I think yours
> can be made simpler because one of the problems with mine is that it
> needs to support multiple parallel I/O operations on the same throttling
> group, and that's why the locking rules are more complex. With a single
> user per ThrottleConfig this is not necessary.
>
> Have you checked if you really need them? My impression is that you
> might be able to get rid of the 'lock', 'any_timer_armed' and
> 'pending_reqs' fields.
>
> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see how
> was the transition from single-drive throttling to group throttling,
> specifically the bdrv_io_limits_intercept() function. You will see that
> it was simpler.

I cleaned up the code as you suggested, its working fine.

-Pradeep

>
>> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>>                               const struct iovec *iov,
>>                               int iovcnt, off_t offset)
>>  {
>> -    ssize_t ret
>> -;
>> +    ssize_t ret;
>> +
>
> This could go to a separate patch, but I'm fine if you include it in
> this one.
>
>> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>      const char *sec_model = qemu_opt_get(opts, "security_model");
>>      const char *path = qemu_opt_get(opts, "path");
>>
>> +    FsThrottle *fst = &fse->fst;
>> +
>
> I'd remove the empty line between the declarations of 'path' and 'fst',
> however...
>
>>      if (!sec_model) {
>>          error_report("Security model not specified, local fs needs security model");
>>          error_printf("valid options are:"
>> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>          error_report("fsdev: No path specified");
>>          return -1;
>>      }
>> +
>> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
>> +        return -1;
>> +    }
>
> ...you don't need to declare fst here, you could also pass &fse->fst
> directly.
>
> Berto
>
Pradeep Jagadeesh Sept. 22, 2016, 11:46 a.m. UTC | #8
Hi Greg,

> On Wed, 21 Sep 2016 12:22:25 +0200
> Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> wrote:
>
>> Hi Greg,
>>
>> Thanks for having a look at patchset.
>> See the replies below.
>>
>>> On Fri, 16 Sep 2016 04:33:36 -0400
>>> Pradeep Jagadeesh <pradeepkiruvale@gmail.com> wrote:
>>>
>>>> Uses throttling APIs to limit I/O bandwidth and number of operations on the
>>>> fsdev devices.
>>>>
>>>> Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>>>> ---
>>>
>>> Hi Pradeep,
>>>
>>> Please find some comments below.
>>>
>>>>  fsdev/Makefile.objs         |   1 +
>>>>  fsdev/file-op-9p.h          |   3 +
>>>>  fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++
>>>>  fsdev/qemu-fsdev-throttle.c | 191 ++++++++++++++++++++++++++++++++++++++++++++
>>>>  fsdev/qemu-fsdev-throttle.h |  42 ++++++++++
>>>
>>> Is the qemu- prefix really needed ?
>> Just followed the previous file naming convention,
>> like qemu-fsdev-opts.c
>>>
>>>>  hw/9pfs/9p-local.c          |  11 ++-
>>>>  hw/9pfs/9p.c                |   7 ++
>>>>  hw/9pfs/cofile.c            |   3 +
>>>>  8 files changed, 332 insertions(+), 2 deletions(-)
>>>>  create mode 100644 fsdev/qemu-fsdev-throttle.c
>>>>  create mode 100644 fsdev/qemu-fsdev-throttle.h
>>>>
>>>> This adds the support for the 9p-local driver.
>>>> For now this functionality can be enabled only through qemu cli options.
>>>> QMP interface and support to other drivers need further extensions.
>>>> To make it simple for other drivers, the throttle code has been put in
>>>> separate files.
>>>>
>>>> v1 -> v2:
>>>>
>>>> -Fixed FsContext redeclaration issue
>>>> -Removed couple of function declarations from 9p-throttle.h
>>>> -Fixed some of the .help messages
>>>>
>>>> v2 -> v3:
>>>>
>>>> -Addressed follwing comments by Claudio Fontana
>>>>  -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
>>>>  -Checking throttle structure validity before initializing other structures
>>>>   in fsdev_throttle_configure_iolimits
>>>>
>>>> -Addressed following comments by Greg Kurz
>>>>  -Moved the code from 9pfs directory to fsdev directory, because the throttling
>>>>   is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
>>>>  -Renamed throttling cli options to throttling.*, as in QMP cli options
>>>>  -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
>>>>  -Using throttle_enabled() function to set the thottle enabled flag for fsdev.
>>>>
>>>> diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
>>>> index 1b120a4..2c6da2d 100644
>>>> --- a/fsdev/Makefile.objs
>>>> +++ b/fsdev/Makefile.objs
>>>> @@ -7,6 +7,7 @@ common-obj-y = qemu-fsdev-dummy.o
>>>>  endif
>>>>  common-obj-y += qemu-fsdev-opts.o
>>>>
>>>> +common-obj-y += qemu-fsdev-throttle.o
>>>>  # Toplevel always builds this; targets without virtio will put it in
>>>>  # common-obj-y
>>>>  common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
>>>> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
>>>> index 6db9fea..33fe822 100644
>>>> --- a/fsdev/file-op-9p.h
>>>> +++ b/fsdev/file-op-9p.h
>>>> @@ -17,6 +17,7 @@
>>>>  #include <dirent.h>
>>>>  #include <utime.h>
>>>>  #include <sys/vfs.h>
>>>> +#include "qemu-fsdev-throttle.h"
>>>>
>>>>  #define SM_LOCAL_MODE_BITS    0600
>>>>  #define SM_LOCAL_DIR_MODE_BITS    0700
>>>> @@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
>>>>      char *path;
>>>>      int export_flags;
>>>>      FileOperations *ops;
>>>> +    FsThrottle fst;
>>>>  } FsDriverEntry;
>>>>
>>>>  typedef struct FsContext
>>>> @@ -83,6 +85,7 @@ typedef struct FsContext
>>>>      int export_flags;
>>>>      struct xattr_operations **xops;
>>>>      struct extended_ops exops;
>>>> +    FsThrottle *fst;
>>>>      /* fs driver specific data */
>>>>      void *private;
>>>>  } FsContext;
>>>> diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
>>>> index 1dd8c7a..395d497 100644
>>>> --- a/fsdev/qemu-fsdev-opts.c
>>>> +++ b/fsdev/qemu-fsdev-opts.c
>>>> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>>>>          }, {
>>>>              .name = "sock_fd",
>>>>              .type = QEMU_OPT_NUMBER,
>>>> +        }, {
>>>> +            .name = "throttling.iops-total",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit total I/O operations per second",
>>>> +        },{
>>>> +            .name = "throttling.iops-read",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit read operations per second",
>>>> +        },{
>>>> +            .name = "throttling.iops-write",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit write operations per second",
>>>> +        },{
>>>> +            .name = "throttling.bps-total",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit total bytes per second",
>>>> +        },{
>>>> +            .name = "throttling.bps-read",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit read bytes per second",
>>>> +        },{
>>>> +            .name = "throttling.bps-write",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "limit write bytes per second",
>>>> +        },{
>>>> +            .name = "throttling.iops-total-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "I/O operations burst",
>>>> +        },{
>>>> +            .name = "throttling.iops-read-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "I/O operations read burst",
>>>> +        },{
>>>> +            .name = "throttling.iops-write-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "I/O operations write burst",
>>>> +        },{
>>>> +            .name = "throttling.bps-total-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "total bytes burst",
>>>> +        },{
>>>> +            .name = "throttling.bps-read-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "total bytes read burst",
>>>> +        },{
>>>> +            .name = "throttling.bps-write-max",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "total bytes write burst",
>>>> +        },{
>>>> +            .name = "throttling.iops-total-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the iops-total-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.iops-read-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the iops-read-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.iops-write-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the iops-write-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.bps-total-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the bps-total-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.bps-read-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the bps-read-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.bps-write-max-length",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "length of the bps-write-max burst period, in seconds",
>>>> +        },{
>>>> +            .name = "throttling.iops-size",
>>>> +            .type = QEMU_OPT_NUMBER,
>>>> +            .help = "when limiting by iops max size of an I/O in bytes",
>>>>          },
>>>>
>>>>          { /*End of list */ }
>>>> diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
>>>> new file mode 100644
>>>> index 0000000..0adccd1
>>>> --- /dev/null
>>>> +++ b/fsdev/qemu-fsdev-throttle.c
>>>> @@ -0,0 +1,191 @@
>>>> +/*
>>>> + * Fsdev Throttle
>>>> + *
>>>> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
>>>> + *
>>>> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> + * (at your option) any later version.
>>>> + *
>>>> + * See the COPYING file in the top-level directory for details.
>>>> + *
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qemu/error-report.h"
>>>> +#include "qapi/error.h"
>>>> +#include "qemu-fsdev-throttle.h"
>>>> +
>>>> +static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
>>>> +{
>>>> +    if (fst->any_timer_armed[is_write]) {
>>>> +        return true;
>>>> +    } else {
>>>> +        return throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
>>>> +{
>>>> +    bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>>>
>>> Let's add a newline here.
>> Done!
>>>
>>>> +    if (!fst->pending_reqs[is_write]) {
>>>> +        return;
>>>> +    }
>>>> +    if (!must_wait) {
>>>> +        if (qemu_in_coroutine() &&
>>>> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
>>>> +            ;
>>>> +       } else {
>>>> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
>>>> +           timer_mod(fst->tt.timers[is_write], now + 1);
>>>> +           fst->any_timer_armed[is_write] = true;
>>>> +       }
>>>> +   }
>>>> +}
>>>> +
>>>> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
>>>> +{
>>>> +    bool empty_queue;
>>>
>>> Same here.
>> Done!
>>>
>>>> +    qemu_mutex_lock(&fst->lock);
>>>> +    fst->any_timer_armed[is_write] = false;
>>>> +    qemu_mutex_unlock(&fst->lock);
>>>> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
>>>> +    if (empty_queue) {
>>>> +        qemu_mutex_lock(&fst->lock);
>>>> +        fsdev_throttle_schedule_next_request(fst, is_write);
>>>> +        qemu_mutex_unlock(&fst->lock);
>>>> +    }
>>>> +}
>>>> +
>>>> +bool fsdev_throttle_enabled(FsThrottle *fst)
>>>> +{
>>>> +    return fst->enabled;
>>>> +}
>>>> +
>>>> +static void fsdev_iolimits_enable(FsThrottle *fst)
>>>> +{
>>>> +    fst->enabled = true;
>>>> +}
>>>> +
>>>> +static void fsdev_iolimits_disable(FsThrottle *fst)
>>>> +{
>>>> +    fst->enabled = false;
>>>> +}
>>>> +
>>>
>>> fsdev_throttle_configure_iolimits() is the only function that will ever
>>> write to fst->enabled. The helpers seem to indicate the opposite. It is
>>> better to open code in fsdev_throttle_configure_iolimits().
>> You mean just use  fst->enabled = true; inside the
>> fsdev_throttle_configure_iolimits().?
>> If that is the case, I thought these helpers are useful in future when
>> we have qmp interfaces to enable,disable the throttle dynamically/run-time.
>
> Hmm... in this case, something looks wrong.
>
> If we start with a valid throttling config, then disable it with QMP and then
> unrealize the device, we won't do the cleanup... fst->enabled looks like
> a premature optimization which causes confusion actually.
>
> I'd prefer fsdev to behave like blockdev, which doesn't have an enabled flag.
>
> Please drop it completely. :)
Done!
>
>>>
>>>> +static void fsdev_throttle_read_timer_cb(void *opaque)
>>>> +{
>>>> +    fsdev_throttle_timer_cb(opaque, false);
>>>> +}
>>>> +
>>>> +static void fsdev_throttle_write_timer_cb(void *opaque)
>>>> +{
>>>> +    fsdev_throttle_timer_cb(opaque, true);
>>>> +}
>>>> +
>>>> +static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
>>>> +{
>>>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-total", 0);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-read", 0);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-write", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-total", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-read", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-write", 0);
>>>> +
>>>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_READ].max =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
>>>> +
>>>> +    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
>>>> +    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
>>>> +          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
>>>> +    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
>>>> +    fst->cfg.op_size =
>>>> +          qemu_opt_get_number(opts, "throttling.iops-size", 0);
>>>> +}
>>>> +
>>>> +int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
>>>> +{
>>>> +    Error *err = NULL;
>>>> +
>>>> +    /* Parse and set populate the cfg structure */
>>>> +    fsdev_parse_io_limit_opts(opts, fst);
>>>> +
>>>> +    if (!throttle_is_valid(&fst->cfg, &err)) {
>>>> +        error_report("Throttle configuration is not valid");
>>>
>>> throttle_is_valid() already populates err with a meaningful message and
>>> you should use it:
>>>
>>>         error_reportf_err(err, "Throttle configuration is not valid: ");
>>>
>> OK,fixed.
>>>> +        return -1;
>>>> +    }
>>>> +    if (throttle_enabled(&fst->cfg)) {
>>>> +        fst->aioctx = qemu_get_aio_context();
>>>> +        if (!fst->aioctx) {
>>>> +            error_report("Failed to create AIO Context");
>>>> +            return -1;
>>>> +        }
>>>
>>> qemu_get_aio_context() returns qemu_aio_context which is initialized in
>>> qemu_init_main_loop(). It is guarantied to be non NULL and if it is, we'd
>>> better dump core.
>>>
>> OK, Is there any specific dump api that exists in qemu?
>
> QEMU uses g_assert() for elaborate checks but you really don't need to check
> the return value of qemu_get_aio_context(). Just grep through the code to
> be convinced :)
I did change it to g_assert((fst->aioctx = qemu_get_aio_context()));
is that ok?
>
>>>> +        throttle_init(&fst->ts);
>>>> +        throttle_timers_init(&fst->tt,
>>>> +                             fst->aioctx,
>>>> +                             QEMU_CLOCK_REALTIME,
>>>> +                             fsdev_throttle_read_timer_cb,
>>>> +                             fsdev_throttle_write_timer_cb,
>>>> +                             fst);
>>>> +        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
>>>> +        qemu_co_queue_init(&fst->throttled_reqs[0]);
>>>> +        qemu_co_queue_init(&fst->throttled_reqs[1]);
>>>> +        fst->pending_reqs[0] = 0;
>>>> +        fst->pending_reqs[1] = 0;
>>>> +        fst->any_timer_armed[0] = false;
>>>> +        fst->any_timer_armed[1] = false;
>>>> +        qemu_mutex_init(&fst->lock);
>>>> +        fsdev_iolimits_enable(fst);
>>>> +   } else {
>>>> +        fsdev_iolimits_disable(fst);
>>>> +   }
>>>> +   return 0;
>>>> +}
>>>> +
>>>> +void fsdev_throttle_request(FsThrottle *fst, bool is_write, ssize_t bytes)
>>>> +{
>>>> +    if (fsdev_throttle_enabled(fst)) {
>>>> +        qemu_mutex_lock(&fst->lock);
>>>> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>>>
>>> Even if the QEMU code is supposed to follow C99, I think it is prettier to
>>> declare variables at the beginning of the code block.
>>>
>>>            bool must_wait;
>> OK,fixed.
>>>
>>>            qemu_mutex_lock(&fst->lock);
>>>            must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>>>
>>>> +        if (must_wait || fst->pending_reqs[is_write]) {
>>>> +            fst->pending_reqs[is_write]++;
>>>> +            qemu_mutex_unlock(&fst->lock);
>>>> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
>>>> +            qemu_mutex_lock(&fst->lock);
>>>> +            fst->pending_reqs[is_write]--;
>>>> +       }
>>>> +       throttle_account(&fst->ts, is_write, bytes);
>>>> +       fsdev_throttle_schedule_next_request(fst, is_write);
>>>> +       qemu_mutex_unlock(&fst->lock);
>>>> +    }
>>>> +}
>>>> +
>>>> +void fsdev_throttle_cleanup(FsThrottle *fst)
>>>> +{
>>>> +    throttle_timers_destroy(&fst->tt);
>>>> +    qemu_mutex_destroy(&fst->lock);
>>>> +}
>>>> diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
>>>> new file mode 100644
>>>> index 0000000..73dd367
>>>> --- /dev/null
>>>> +++ b/fsdev/qemu-fsdev-throttle.h
>>>> @@ -0,0 +1,42 @@
>>>> +/*
>>>> + * Fsdev Throttle
>>>> + *
>>>> + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
>>>> + *
>>>> + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> + * (at your option) any later version.
>>>> + *
>>>> + * See the COPYING file in the top-level directory for details.
>>>> + *
>>>> + */
>>>> +
>>>> +#ifndef _FSDEV_THROTTLE_H
>>>> +#define _FSDEV_THROTTLE_H
>>>> +
>>>> +#include "block/aio.h"
>>>> +#include "qemu/main-loop.h"
>>>> +#include "qemu/coroutine.h"
>>>> +#include "qemu/throttle.h"
>>>> +
>>>> +typedef struct FsThrottle {
>>>> +    ThrottleState ts;
>>>> +    ThrottleTimers tt;
>>>> +    AioContext   *aioctx;
>>>> +    ThrottleConfig cfg;
>>>> +    bool enabled;
>>>> +    CoQueue      throttled_reqs[2];
>>>> +    unsigned     pending_reqs[2];
>>>> +    bool any_timer_armed[2];
>>>> +    QemuMutex lock;
>>>> +} FsThrottle;
>>>> +
>>>> +bool fsdev_throttle_enabled(FsThrottle *);
>>>> +
>>>> +int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
>>>> +
>>>> +void fsdev_throttle_request(FsThrottle *, bool , ssize_t);
>>>> +
>>>> +void fsdev_throttle_cleanup(FsThrottle *);
>>>> +#endif /* _FSDEV_THROTTLE_H */
>>>> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
>>>> index 3f271fc..b1009fc 100644
>>>> --- a/hw/9pfs/9p-local.c
>>>> +++ b/hw/9pfs/9p-local.c
>>>> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>>>>                               const struct iovec *iov,
>>>>                               int iovcnt, off_t offset)
>>>>  {
>>>> -    ssize_t ret
>>>> -;
>>>> +    ssize_t ret;
>>>> +
>>>>  #ifdef CONFIG_PREADV
>>>>      ret = pwritev(fs->fd, iov, iovcnt, offset);
>>>>  #else
>>>> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>>>      const char *sec_model = qemu_opt_get(opts, "security_model");
>>>>      const char *path = qemu_opt_get(opts, "path");
>>>>
>>>> +    FsThrottle *fst = &fse->fst;
>>>> +
>>>>      if (!sec_model) {
>>>>          error_report("Security model not specified, local fs needs security model");
>>>>          error_printf("valid options are:"
>>>> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>>>          error_report("fsdev: No path specified");
>>>>          return -1;
>>>>      }
>>>> +
>>>> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>>      fse->path = g_strdup(path);
>>>>
>>>>      return 0;
>>>> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
>>>> index dfe293d..3748ba9 100644
>>>> --- a/hw/9pfs/9p.c
>>>> +++ b/hw/9pfs/9p.c
>>>> @@ -3490,6 +3490,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
>>>>          error_setg(errp, "share path %s is not a directory", fse->path);
>>>>          goto out;
>>>>      }
>>>> +
>>>> +    /* Throttle structure initialization */
>>>
>>> I'm still thinking this comment paraphrases the code :)
>> I missed it!:)
>>>
>>>> +    s->ctx.fst = &fse->fst;
>>>> +
>>>>      v9fs_path_free(&path);
>>>>
>>>>      rc = 0;
>>>> @@ -3504,6 +3508,9 @@ out:
>>>>
>>>>  void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
>>>>  {
>>>> +    if (fsdev_throttle_enabled(s->ctx.fst)) {
>>>> +        fsdev_throttle_cleanup(s->ctx.fst);
>>>> +    }
>>>>      g_free(s->ctx.fs_root);
>>>>      g_free(s->tag);
>>>>  }
>>>> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
>>>> index 10343c0..65897d5 100644
>>>> --- a/hw/9pfs/cofile.c
>>>> +++ b/hw/9pfs/cofile.c
>>>> @@ -16,6 +16,7 @@
>>>>  #include "qemu/thread.h"
>>>>  #include "qemu/coroutine.h"
>>>>  #include "coth.h"
>>>> +#include "fsdev/qemu-fsdev-throttle.h"
>>>>
>>>>  int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
>>>>                     V9fsStatDotl *v9stat)
>>>> @@ -242,6 +243,7 @@ int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
>>>>      int err;
>>>>      V9fsState *s = pdu->s;
>>>>
>>>> +    fsdev_throttle_request(s->ctx.fst, true, iov->iov_len);
>>>
>>> I think it would make more sense to move this after the cancellation point.
>> Done!
>>>
>>>>      if (v9fs_request_cancelled(pdu)) {
>>>>          return -EINTR;
>>>>      }
>>>> @@ -261,6 +263,7 @@ int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
>>>>      int err;
>>>>      V9fsState *s = pdu->s;
>>>>
>>>> +    fsdev_throttle_request(s->ctx.fst, false, iov->iov_len);
>>>
>>> Same here.
>>>
>> Done!
>>>>      if (v9fs_request_cancelled(pdu)) {
>>>>          return -EINTR;
>>>>      }
>>>
>>> Cheers.
>>>
>>> --
>>> Greg
>>>
>> Regards,
>> Pradeep
>>
>
Pradeep Jagadeesh Oct. 17, 2016, 9:19 a.m. UTC | #9
Hi Alberto,
> On Fri 16 Sep 2016 10:33:36 AM CEST, Pradeep Jagadeesh wrote:
>
> Hi,
>
> first of all, sorry for the late reply! Here are my comments:
>
>> --- a/fsdev/qemu-fsdev-opts.c
>> +++ b/fsdev/qemu-fsdev-opts.c
>> @@ -37,6 +37,82 @@ static QemuOptsList qemu_fsdev_opts = {
>>          }, {
>>              .name = "sock_fd",
>>              .type = QEMU_OPT_NUMBER,
>> +        }, {
>> +            .name = "throttling.iops-total",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total I/O operations per second",
>   /*...*/
>> +        },{
>> +            .name = "throttling.iops-size",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "when limiting by iops max size of an I/O in bytes",
>
> It would be nice if we could factor these so we don't have to have them
> twice in the code. I think it should be doable with qemu_opts_append().
>
> It can be done later in a separate patch if you prefer.
>
>> +typedef struct FsThrottle {
>> +    ThrottleState ts;
>> +    ThrottleTimers tt;
>> +    AioContext   *aioctx;
>> +    ThrottleConfig cfg;
>> +    bool enabled;
>> +    CoQueue      throttled_reqs[2];
>> +    unsigned     pending_reqs[2];
>> +    bool any_timer_armed[2];
>> +    QemuMutex lock;
>> +} FsThrottle;
>
> You based your implementation on block/throttle-groups.c. I think yours
> can be made simpler because one of the problems with mine is that it
> needs to support multiple parallel I/O operations on the same throttling
> group, and that's why the locking rules are more complex. With a single
> user per ThrottleConfig this is not necessary.
>
> Have you checked if you really need them? My impression is that you
> might be able to get rid of the 'lock', 'any_timer_armed' and
> 'pending_reqs' fields.
>
> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see how
> was the transition from single-drive throttling to group throttling,
> specifically the bdrv_io_limits_intercept() function. You will see that
> it was simpler.

I tried removing the lock, I got into rcu issues, and the qemu hangs.
Once I put them back, it works fine.

-Pradeep
>
>> @@ -436,8 +436,8 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
>>                               const struct iovec *iov,
>>                               int iovcnt, off_t offset)
>>  {
>> -    ssize_t ret
>> -;
>> +    ssize_t ret;
>> +
>
> This could go to a separate patch, but I'm fine if you include it in
> this one.
>
>> @@ -1213,6 +1213,8 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>      const char *sec_model = qemu_opt_get(opts, "security_model");
>>      const char *path = qemu_opt_get(opts, "path");
>>
>> +    FsThrottle *fst = &fse->fst;
>> +
>
> I'd remove the empty line between the declarations of 'path' and 'fst',
> however...
>
>>      if (!sec_model) {
>>          error_report("Security model not specified, local fs needs security model");
>>          error_printf("valid options are:"
>> @@ -1240,6 +1242,11 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
>>          error_report("fsdev: No path specified");
>>          return -1;
>>      }
>> +
>> +    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
>> +        return -1;
>> +    }
>
> ...you don't need to declare fst here, you could also pass &fse->fst
> directly.
>
> Berto
>
Alberto Garcia Oct. 18, 2016, 4:19 p.m. UTC | #10
On Mon 17 Oct 2016 11:19:11 AM CEST, Pradeep Jagadeesh wrote:

>>> +typedef struct FsThrottle {
>>> +    ThrottleState ts;
>>> +    ThrottleTimers tt;
>>> +    AioContext   *aioctx;
>>> +    ThrottleConfig cfg;
>>> +    bool enabled;
>>> +    CoQueue      throttled_reqs[2];
>>> +    unsigned     pending_reqs[2];
>>> +    bool any_timer_armed[2];
>>> +    QemuMutex lock;
>>> +} FsThrottle;
>>
>> You based your implementation on block/throttle-groups.c. I think
>> yours can be made simpler because one of the problems with mine is
>> that it needs to support multiple parallel I/O operations on the same
>> throttling group, and that's why the locking rules are more
>> complex. With a single user per ThrottleConfig this is not necessary.
>>
>> Have you checked if you really need them? My impression is that you
>> might be able to get rid of the 'lock', 'any_timer_armed' and
>> 'pending_reqs' fields.
>>
>> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see
>> how was the transition from single-drive throttling to group
>> throttling, specifically the bdrv_io_limits_intercept() function. You
>> will see that it was simpler.
>
> I tried removing the lock, I got into rcu issues, and the qemu hangs.
> Once I put them back, it works fine.

Did you figure out why the lock is needed in this case? In the case of
disk group throttling, it is because there is data shared by several
disks which may be running at the same time.

Berto
Pradeep Jagadeesh Oct. 19, 2016, 8:57 a.m. UTC | #11
On 10/18/2016 6:19 PM, Alberto Garcia wrote:
> On Mon 17 Oct 2016 11:19:11 AM CEST, Pradeep Jagadeesh wrote:
>
>>>> +typedef struct FsThrottle {
>>>> +    ThrottleState ts;
>>>> +    ThrottleTimers tt;
>>>> +    AioContext   *aioctx;
>>>> +    ThrottleConfig cfg;
>>>> +    bool enabled;
>>>> +    CoQueue      throttled_reqs[2];
>>>> +    unsigned     pending_reqs[2];
>>>> +    bool any_timer_armed[2];
>>>> +    QemuMutex lock;
>>>> +} FsThrottle;
>>>
>>> You based your implementation on block/throttle-groups.c. I think
>>> yours can be made simpler because one of the problems with mine is
>>> that it needs to support multiple parallel I/O operations on the same
>>> throttling group, and that's why the locking rules are more
>>> complex. With a single user per ThrottleConfig this is not necessary.
>>>
>>> Have you checked if you really need them? My impression is that you
>>> might be able to get rid of the 'lock', 'any_timer_armed' and
>>> 'pending_reqs' fields.
>>>
>>> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see
>>> how was the transition from single-drive throttling to group
>>> throttling, specifically the bdrv_io_limits_intercept() function. You
>>> will see that it was simpler.
>>
>> I tried removing the lock, I got into rcu issues, and the qemu hangs.
>> Once I put them back, it works fine.
>
> Did you figure out why the lock is needed in this case? In the case of
> disk group throttling, it is because there is data shared by several
> disks which may be running at the same time.

If I remove the locks and run the IO workloads. I am get a message, 
"main-loop: WARNING: I/O thread spun for 1000 iterations" and then the 
qemu hangs. Even the monitor does not respond.
I found that similar issues faced by many people, also some fixes, none 
of them worked for me.

Regards,
Pradeep

>
> Berto
>
Pradeep Jagadeesh Oct. 19, 2016, 4:29 p.m. UTC | #12
On 10/18/2016 6:19 PM, Alberto Garcia wrote:
> On Mon 17 Oct 2016 11:19:11 AM CEST, Pradeep Jagadeesh wrote:
>
>>>> +typedef struct FsThrottle {
>>>> +    ThrottleState ts;
>>>> +    ThrottleTimers tt;
>>>> +    AioContext   *aioctx;
>>>> +    ThrottleConfig cfg;
>>>> +    bool enabled;
>>>> +    CoQueue      throttled_reqs[2];
>>>> +    unsigned     pending_reqs[2];
>>>> +    bool any_timer_armed[2];
>>>> +    QemuMutex lock;
>>>> +} FsThrottle;
>>>
>>> You based your implementation on block/throttle-groups.c. I think
>>> yours can be made simpler because one of the problems with mine is
>>> that it needs to support multiple parallel I/O operations on the same
>>> throttling group, and that's why the locking rules are more
>>> complex. With a single user per ThrottleConfig this is not necessary.
>>>
>>> Have you checked if you really need them? My impression is that you
>>> might be able to get rid of the 'lock', 'any_timer_armed' and
>>> 'pending_reqs' fields.
>>>
>>> Please check commit 76f4afb40fa076ed23fe0ab42c7a768ddb71123f to see
>>> how was the transition from single-drive throttling to group
>>> throttling, specifically the bdrv_io_limits_intercept() function. You
>>> will see that it was simpler.
>>
>> I tried removing the lock, I got into rcu issues, and the qemu hangs.
>> Once I put them back, it works fine.
>
> Did you figure out why the lock is needed in this case? In the case of
> disk group throttling, it is because there is data shared by several
> disks which may be running at the same time.
>
One more update is, I had a look at bdrv_io_limits_intercept and  I just 
removed lock and retained other two things pending_reqs,any_timer_armed
It works fine.

-Pradeep

> Berto
>
Alberto Garcia Oct. 20, 2016, 12:50 p.m. UTC | #13
On Wed 19 Oct 2016 06:29:45 PM CEST, Pradeep Jagadeesh wrote:

> One more update is, I had a look at bdrv_io_limits_intercept and I
> just removed lock and retained other two things
> pending_reqs,any_timer_armed It works fine.

So you managed to make it work without the lock? Can you post the new
version then?

Thanks,

Berto
Greg Kurz Oct. 20, 2016, 1:32 p.m. UTC | #14
On Thu, 20 Oct 2016 14:50:29 +0200
Alberto Garcia <berto@igalia.com> wrote:

> On Wed 19 Oct 2016 06:29:45 PM CEST, Pradeep Jagadeesh wrote:
> 
> > One more update is, I had a look at bdrv_io_limits_intercept and I
> > just removed lock and retained other two things
> > pending_reqs,any_timer_armed It works fine.  
> 
> So you managed to make it work without the lock? Can you post the new
> version then?
> 
> Thanks,
> 
> Berto

Pradeep,

Just a reminder: soft freeze is coming (http://wiki.qemu.org/Planning/2.8).
Unless this series is posted/reviewed in time (especially all the issues about
locking and throttling not behaving as expected have been addressed and well
tested), I won't be able to push it to 2.8.

Cheers.

--
Greg
Pradeep Jagadeesh Oct. 20, 2016, 1:48 p.m. UTC | #15
On 10/20/2016 2:50 PM, Alberto Garcia wrote:
> On Wed 19 Oct 2016 06:29:45 PM CEST, Pradeep Jagadeesh wrote:
>
>> One more update is, I had a look at bdrv_io_limits_intercept and I
>> just removed lock and retained other two things
>> pending_reqs,any_timer_armed It works fine.
>
> So you managed to make it work without the lock? Can you post the new
> version then?

Sure, I will send soon.

-Pradeep

>
> Thanks,
>
> Berto
>
Pradeep Jagadeesh Oct. 20, 2016, 1:49 p.m. UTC | #16
On 10/20/2016 3:32 PM, Greg Kurz wrote:
> On Thu, 20 Oct 2016 14:50:29 +0200
> Alberto Garcia <berto@igalia.com> wrote:
>
>> On Wed 19 Oct 2016 06:29:45 PM CEST, Pradeep Jagadeesh wrote:
>>
>>> One more update is, I had a look at bdrv_io_limits_intercept and I
>>> just removed lock and retained other two things
>>> pending_reqs,any_timer_armed It works fine.
>>
>> So you managed to make it work without the lock? Can you post the new
>> version then?
>>
>> Thanks,
>>
>> Berto
>
> Pradeep,
>
> Just a reminder: soft freeze is coming (http://wiki.qemu.org/Planning/2.8).
> Unless this series is posted/reviewed in time (especially all the issues about
> locking and throttling not behaving as expected have been addressed and well
> tested), I won't be able to push it to 2.8.

Hi Greg,

Thanks for the release info.I am sending out patch today.Lets see how 
things go after that.

Regards,
Pradeep
>
> Cheers.
>
> --
> Greg
>
diff mbox

Patch

diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
index 1b120a4..2c6da2d 100644
--- a/fsdev/Makefile.objs
+++ b/fsdev/Makefile.objs
@@ -7,6 +7,7 @@  common-obj-y = qemu-fsdev-dummy.o
 endif
 common-obj-y += qemu-fsdev-opts.o
 
+common-obj-y += qemu-fsdev-throttle.o
 # Toplevel always builds this; targets without virtio will put it in
 # common-obj-y
 common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 6db9fea..33fe822 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -17,6 +17,7 @@ 
 #include <dirent.h>
 #include <utime.h>
 #include <sys/vfs.h>
+#include "qemu-fsdev-throttle.h"
 
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700
@@ -74,6 +75,7 @@  typedef struct FsDriverEntry {
     char *path;
     int export_flags;
     FileOperations *ops;
+    FsThrottle fst;
 } FsDriverEntry;
 
 typedef struct FsContext
@@ -83,6 +85,7 @@  typedef struct FsContext
     int export_flags;
     struct xattr_operations **xops;
     struct extended_ops exops;
+    FsThrottle *fst;
     /* fs driver specific data */
     void *private;
 } FsContext;
diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
index 1dd8c7a..395d497 100644
--- a/fsdev/qemu-fsdev-opts.c
+++ b/fsdev/qemu-fsdev-opts.c
@@ -37,6 +37,82 @@  static QemuOptsList qemu_fsdev_opts = {
         }, {
             .name = "sock_fd",
             .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "throttling.iops-total",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total I/O operations per second",
+        },{
+            .name = "throttling.iops-read",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read operations per second",
+        },{
+            .name = "throttling.iops-write",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write operations per second",
+        },{
+            .name = "throttling.bps-total",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total bytes per second",
+        },{
+            .name = "throttling.bps-read",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read bytes per second",
+        },{
+            .name = "throttling.bps-write",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write bytes per second",
+        },{
+            .name = "throttling.iops-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations burst",
+        },{
+            .name = "throttling.iops-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations read burst",
+        },{
+            .name = "throttling.iops-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations write burst",
+        },{
+            .name = "throttling.bps-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes burst",
+        },{
+            .name = "throttling.bps-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes read burst",
+        },{
+            .name = "throttling.bps-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes write burst",
+        },{
+            .name = "throttling.iops-total-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-total-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-read-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-read-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-write-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-write-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-total-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-total-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-read-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-read-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-write-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-write-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-size",
+            .type = QEMU_OPT_NUMBER,
+            .help = "when limiting by iops max size of an I/O in bytes",
         },
 
         { /*End of list */ }
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
new file mode 100644
index 0000000..0adccd1
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -0,0 +1,191 @@ 
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu-fsdev-throttle.h"
+
+static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
+{
+    if (fst->any_timer_armed[is_write]) {
+        return true;
+    } else {
+        return throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
+    }
+}
+
+static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
+{
+    bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
+    if (!fst->pending_reqs[is_write]) {
+        return;
+    }
+    if (!must_wait) {
+        if (qemu_in_coroutine() &&
+            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
+            ;
+       } else {
+           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
+           timer_mod(fst->tt.timers[is_write], now + 1);
+           fst->any_timer_armed[is_write] = true;
+       }
+   }
+}
+
+static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
+{
+    bool empty_queue;
+    qemu_mutex_lock(&fst->lock);
+    fst->any_timer_armed[is_write] = false;
+    qemu_mutex_unlock(&fst->lock);
+    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
+    if (empty_queue) {
+        qemu_mutex_lock(&fst->lock);
+        fsdev_throttle_schedule_next_request(fst, is_write);
+        qemu_mutex_unlock(&fst->lock);
+    }
+}
+
+bool fsdev_throttle_enabled(FsThrottle *fst)
+{
+    return fst->enabled;
+}
+
+static void fsdev_iolimits_enable(FsThrottle *fst)
+{
+    fst->enabled = true;
+}
+
+static void fsdev_iolimits_disable(FsThrottle *fst)
+{
+    fst->enabled = false;
+}
+
+static void fsdev_throttle_read_timer_cb(void *opaque)
+{
+    fsdev_throttle_timer_cb(opaque, false);
+}
+
+static void fsdev_throttle_write_timer_cb(void *opaque)
+{
+    fsdev_throttle_timer_cb(opaque, true);
+}
+
+static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
+{
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
+          qemu_opt_get_number(opts, "throttling.bps-total", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
+          qemu_opt_get_number(opts, "throttling.bps-read", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
+          qemu_opt_get_number(opts, "throttling.bps-write", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
+          qemu_opt_get_number(opts, "throttling.iops-total", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
+          qemu_opt_get_number(opts, "throttling.iops-read", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
+          qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
+          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
+          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
+          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
+          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].max =
+          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
+          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
+          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
+          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
+          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+    fst->cfg.op_size =
+          qemu_opt_get_number(opts, "throttling.iops-size", 0);
+}
+
+int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
+{
+    Error *err = NULL;
+
+    /* Parse and set populate the cfg structure */
+    fsdev_parse_io_limit_opts(opts, fst);
+
+    if (!throttle_is_valid(&fst->cfg, &err)) {
+        error_report("Throttle configuration is not valid");
+        return -1;
+    }
+    if (throttle_enabled(&fst->cfg)) {
+        fst->aioctx = qemu_get_aio_context();
+        if (!fst->aioctx) {
+            error_report("Failed to create AIO Context");
+            return -1;
+        }
+        throttle_init(&fst->ts);
+        throttle_timers_init(&fst->tt,
+                             fst->aioctx,
+                             QEMU_CLOCK_REALTIME,
+                             fsdev_throttle_read_timer_cb,
+                             fsdev_throttle_write_timer_cb,
+                             fst);
+        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
+        qemu_co_queue_init(&fst->throttled_reqs[0]);
+        qemu_co_queue_init(&fst->throttled_reqs[1]);
+        fst->pending_reqs[0] = 0;
+        fst->pending_reqs[1] = 0;
+        fst->any_timer_armed[0] = false;
+        fst->any_timer_armed[1] = false;
+        qemu_mutex_init(&fst->lock);
+        fsdev_iolimits_enable(fst);
+   } else {
+        fsdev_iolimits_disable(fst);
+   }
+   return 0;
+}
+
+void fsdev_throttle_request(FsThrottle *fst, bool is_write, ssize_t bytes)
+{
+    if (fsdev_throttle_enabled(fst)) {
+        qemu_mutex_lock(&fst->lock);
+        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
+        if (must_wait || fst->pending_reqs[is_write]) {
+            fst->pending_reqs[is_write]++;
+            qemu_mutex_unlock(&fst->lock);
+            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
+            qemu_mutex_lock(&fst->lock);
+            fst->pending_reqs[is_write]--;
+       }
+       throttle_account(&fst->ts, is_write, bytes);
+       fsdev_throttle_schedule_next_request(fst, is_write);
+       qemu_mutex_unlock(&fst->lock);
+    }
+}
+
+void fsdev_throttle_cleanup(FsThrottle *fst)
+{
+    throttle_timers_destroy(&fst->tt);
+    qemu_mutex_destroy(&fst->lock);
+}
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
new file mode 100644
index 0000000..73dd367
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.h
@@ -0,0 +1,42 @@ 
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#ifndef _FSDEV_THROTTLE_H
+#define _FSDEV_THROTTLE_H
+
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/throttle.h"
+
+typedef struct FsThrottle {
+    ThrottleState ts;
+    ThrottleTimers tt;
+    AioContext   *aioctx;
+    ThrottleConfig cfg;
+    bool enabled;
+    CoQueue      throttled_reqs[2];
+    unsigned     pending_reqs[2];
+    bool any_timer_armed[2];
+    QemuMutex lock;
+} FsThrottle;
+
+bool fsdev_throttle_enabled(FsThrottle *);
+
+int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
+
+void fsdev_throttle_request(FsThrottle *, bool , ssize_t);
+
+void fsdev_throttle_cleanup(FsThrottle *);
+#endif /* _FSDEV_THROTTLE_H */
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 3f271fc..b1009fc 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -436,8 +436,8 @@  static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
                              const struct iovec *iov,
                              int iovcnt, off_t offset)
 {
-    ssize_t ret
-;
+    ssize_t ret;
+
 #ifdef CONFIG_PREADV
     ret = pwritev(fs->fd, iov, iovcnt, offset);
 #else
@@ -1213,6 +1213,8 @@  static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
     const char *sec_model = qemu_opt_get(opts, "security_model");
     const char *path = qemu_opt_get(opts, "path");
 
+    FsThrottle *fst = &fse->fst;
+
     if (!sec_model) {
         error_report("Security model not specified, local fs needs security model");
         error_printf("valid options are:"
@@ -1240,6 +1242,11 @@  static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         error_report("fsdev: No path specified");
         return -1;
     }
+
+    if (fsdev_throttle_configure_iolimits(opts, fst) < 0) {
+        return -1;
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index dfe293d..3748ba9 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3490,6 +3490,10 @@  int v9fs_device_realize_common(V9fsState *s, Error **errp)
         error_setg(errp, "share path %s is not a directory", fse->path);
         goto out;
     }
+
+    /* Throttle structure initialization */
+    s->ctx.fst = &fse->fst;
+
     v9fs_path_free(&path);
 
     rc = 0;
@@ -3504,6 +3508,9 @@  out:
 
 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
 {
+    if (fsdev_throttle_enabled(s->ctx.fst)) {
+        fsdev_throttle_cleanup(s->ctx.fst);
+    }
     g_free(s->ctx.fs_root);
     g_free(s->tag);
 }
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 10343c0..65897d5 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -16,6 +16,7 @@ 
 #include "qemu/thread.h"
 #include "qemu/coroutine.h"
 #include "coth.h"
+#include "fsdev/qemu-fsdev-throttle.h"
 
 int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
                    V9fsStatDotl *v9stat)
@@ -242,6 +243,7 @@  int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     int err;
     V9fsState *s = pdu->s;
 
+    fsdev_throttle_request(s->ctx.fst, true, iov->iov_len);
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
@@ -261,6 +263,7 @@  int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     int err;
     V9fsState *s = pdu->s;
 
+    fsdev_throttle_request(s->ctx.fst, false, iov->iov_len);
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }