diff mbox

[V5] fsdev: add IO throttle support to fsdev devices

Message ID 1476972863-22041-1-git-send-email-pradeep.jagadeesh@huawei.com
State New
Headers show

Commit Message

Pradeep Jagadeesh Oct. 20, 2016, 2:14 p.m. UTC
Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
---
 fsdev/Makefile.objs         |   1 +
 fsdev/file-op-9p.h          |   3 +
 fsdev/qemu-fsdev-opts.c     |  76 ++++++++++++++++++++
 fsdev/qemu-fsdev-throttle.c | 166 ++++++++++++++++++++++++++++++++++++++++++++
 fsdev/qemu-fsdev-throttle.h |  37 ++++++++++
 hw/9pfs/9p-local.c          |   9 ++-
 hw/9pfs/9p.c                |   6 ++
 hw/9pfs/cofile.c            |   5 ++
 8 files changed, 301 insertions(+), 2 deletions(-)
 create mode 100644 fsdev/qemu-fsdev-throttle.c
 create mode 100644 fsdev/qemu-fsdev-throttle.h

This adds the support for the 9p-local driver.
For now this functionality can be enabled only through qemu cli options.
QMP interface and support to other drivers need further extensions.
To make it simple for other drivers, the throttle code has been put in
separate files.

v1 -> v2:

-Fixed FsContext redeclaration issue
-Removed couple of function declarations from 9p-throttle.h
-Fixed some of the .help messages

v2 -> v3:

-Addressed follwing comments by Claudio Fontana
 -Removed redundant memset calls in fsdev_throttle_configure_iolimits function
 -Checking throttle structure validity before initializing other structures
  in fsdev_throttle_configure_iolimits

-Addressed following comments by Greg Kurz
 -Moved the code from 9pfs directory to fsdev directory, because the throttling
  is for the fsdev devices.Renamed the files and functions to fsdev_ from 9pfs_
 -Renamed throttling cli options to throttling.*, as in QMP cli options
 -Removed some of the unwanted .h files from qemu-fsdev-throttle.[ch]
 -Using throttle_enabled() function to set the thottle enabled flag for fsdev.

v3 -> v4:

-Addressed following comments by Alberto Garcia
 -Removed the unwanted locking and other data structures in qemu-fsdev-throttle.[ch]

-Addressed following comments by Greg Kurz
 -Removed fsdev_iolimitsenable/disable functions, instead using throttle_enabled function

v4 -> V5:
 -Fixed the issue with the larger block size accounting.
 (i.e, when the 9pfs mounted using msize=xxx option)

Comments

Alberto Garcia Oct. 21, 2016, 3:02 p.m. UTC | #1
On Thu 20 Oct 2016 04:14:23 PM CEST, Pradeep Jagadeesh wrote:
> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
> +{
> +    bool must_wait;
> +
> +    if (qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
> +        return;
> +    }
> +    must_wait = fsdev_throttle_check_for_wait(fst, is_write);
> +    if (!must_wait) {
> +        if (qemu_in_coroutine() &&
> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
> +            ;
> +       } else {
> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
> +           timer_mod(fst->tt.timers[is_write], now + 1);
> +       }
> +   }
> +}

I think you don't need to schedule the timer here. This was added for
group throttling for disks because once a disk completes an I/O request,
the next request might be in a different disk (due to the round-robin
algorithm). So we put a timer in that disk to handle that.

In your case that cannot happen, see fsdev_throttle_request() below[1]
for a suggestion of how it can look like.

> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
> +{
> +    bool empty_queue;
> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
> +    if (empty_queue) {
> +        fsdev_throttle_schedule_next_request(fst, is_write);
> +    }
> +}

This doesn't make any sense, if the throttled_reqs queue is empty then
you're calling fsdev_throttle_schedule_next_request(), but how can a
next request be scheduled if the queue is empty? :-)

You don't need fsdev_throttle_timer_cb() at all, but you can simply do
this instead:

> +static void fsdev_throttle_read_timer_cb(void *opaque)
> +{
       FsThrottle *fst = opaque;
       qemu_co_enter_next(&fst->throttled_reqs[false]);
> +}
> +
> +static void fsdev_throttle_write_timer_cb(void *opaque)
> +{
       FsThrottle *fst = opaque;
       qemu_co_enter_next(&fst->throttled_reqs[true]);
> +}

> +static int get_num_bytes(struct iovec *iov, int iovcnt)
> +{
> +    int index = 0;
> +    int bytes = 0;
> +
> +    while (index < iovcnt) {
> +        bytes += iov[index].iov_len;
> +        index++;
> +    }
> +    return bytes;

You're looping over the iov vector, use a for loop instead, no?:

for (i = 0; i < iovcnt; i++) {
   bytes += iov[i].iov_len;
}

> +void fsdev_throttle_request(FsThrottle *fst, bool is_write,
> +                            struct iovec *iov, int iovcnt)

First, mark the function as running in a coroutine context:

void coroutine_fn fsdev_throttle_request(FsThrottle *fst, bool is_write,
                                         struct iovec *iov, int iovcnt)

> +{
> +    int bytes;
> +
> +    if (throttle_enabled(&fst->cfg)) {

I think it's probably better to check that in the caller, but I don't
have a strong opinion. If you keep it like this, put the definition of
'bytes' inside the if block instead.

> +        bytes = get_num_bytes(iov, iovcnt);
> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
> +        if (must_wait || !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
> +       }
> +       throttle_account(&fst->ts, is_write, bytes);
> +
> +       fsdev_throttle_schedule_next_request(fst, is_write);

So this is the [1] that I mentioned earlier. In your case instead of
fsdev_throttle_schedule_next_request() I think you simply something
like:

    if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
       /* If there's a timer set, don't do anything else, the timer
          callback will schedule the next request */
       if (fsdev_throttle_check_for_wait(fst, is_write)) {
           return;
       }

       /* If a timer is not set, then schedule the next request now */
       qemu_co_queue_next(&fst->throttled_reqs[is_write]);
    }

Berto
Alberto Garcia Oct. 21, 2016, 3:05 p.m. UTC | #2
On Fri 21 Oct 2016 05:02:29 PM CEST, Alberto Garcia wrote:
>> +void fsdev_throttle_request(FsThrottle *fst, bool is_write,
>> +                            struct iovec *iov, int iovcnt)
>
> First, mark the function as running in a coroutine context:
>
> void coroutine_fn fsdev_throttle_request(FsThrottle *fst, bool is_write,
>                                          struct iovec *iov, int
>                                          iovcnt)

Sorry, I forgot to update the function name, it should be
fsdev_co_throttle_request()

Berto
Pradeep Jagadeesh Oct. 21, 2016, 4:04 p.m. UTC | #3
Alberto,

Thanks for reviewing the patch.

> On Thu 20 Oct 2016 04:14:23 PM CEST, Pradeep Jagadeesh wrote:
>> +static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
>> +{
>> +    bool must_wait;
>> +
>> +    if (qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
>> +        return;
>> +    }
>> +    must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>> +    if (!must_wait) {
>> +        if (qemu_in_coroutine() &&
>> +            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
>> +            ;
>> +       } else {
>> +           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
>> +           timer_mod(fst->tt.timers[is_write], now + 1);
>> +       }
>> +   }
>> +}
>
> I think you don't need to schedule the timer here. This was added for
> group throttling for disks because once a disk completes an I/O request,
> the next request might be in a different disk (due to the round-robin
> algorithm). So we put a timer in that disk to handle that.
>
> In your case that cannot happen, see fsdev_throttle_request() below[1]
> for a suggestion of how it can look like.
OK
>
>> +static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
>> +{
>> +    bool empty_queue;
>> +    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
>> +    if (empty_queue) {
>> +        fsdev_throttle_schedule_next_request(fst, is_write);
>> +    }
>> +}
>
> This doesn't make any sense, if the throttled_reqs queue is empty then
> you're calling fsdev_throttle_schedule_next_request(), but how can a
> next request be scheduled if the queue is empty? :-)
>
> You don't need fsdev_throttle_timer_cb() at all, but you can simply do
> this instead:
OK
>
>> +static void fsdev_throttle_read_timer_cb(void *opaque)
>> +{
>        FsThrottle *fst = opaque;
>        qemu_co_enter_next(&fst->throttled_reqs[false]);
>> +}
>> +
>> +static void fsdev_throttle_write_timer_cb(void *opaque)
>> +{
>        FsThrottle *fst = opaque;
>        qemu_co_enter_next(&fst->throttled_reqs[true]);
>> +}
>
>> +static int get_num_bytes(struct iovec *iov, int iovcnt)
>> +{
>> +    int index = 0;
>> +    int bytes = 0;
>> +
>> +    while (index < iovcnt) {
>> +        bytes += iov[index].iov_len;
>> +        index++;
>> +    }
>> +    return bytes;
>
> You're looping over the iov vector, use a for loop instead, no?:
>
> for (i = 0; i < iovcnt; i++) {
>    bytes += iov[i].iov_len;
> }
>
OK
>> +void fsdev_throttle_request(FsThrottle *fst, bool is_write,
>> +                            struct iovec *iov, int iovcnt)
>
> First, mark the function as running in a coroutine context:
>
OK
> void coroutine_fn fsdev_throttle_request(FsThrottle *fst, bool is_write,
>                                          struct iovec *iov, int iovcnt)
>
>> +{
>> +    int bytes;
>> +
>> +    if (throttle_enabled(&fst->cfg)) {
>
> I think it's probably better to check that in the caller, but I don't
> have a strong opinion. If you keep it like this, put the definition of
> 'bytes' inside the if block instead.
>
I just thought its better to check for throttle enabled or not.

>> +        bytes = get_num_bytes(iov, iovcnt);
>> +        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
>> +        if (must_wait || !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
>> +            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
>> +       }
>> +       throttle_account(&fst->ts, is_write, bytes);
>> +
>> +       fsdev_throttle_schedule_next_request(fst, is_write);
>
> So this is the [1] that I mentioned earlier. In your case instead of
> fsdev_throttle_schedule_next_request() I think you simply something
> like:
>
>     if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
>        /* If there's a timer set, don't do anything else, the timer
>           callback will schedule the next request */
>        if (fsdev_throttle_check_for_wait(fst, is_write)) {
>            return;
>        }
>
>        /* If a timer is not set, then schedule the next request now */
>        qemu_co_queue_next(&fst->throttled_reqs[is_write]);
>     }
>
OK, also I changed the name as you suggested.

-Pradeep
> Berto
>
diff mbox

Patch

diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
index 1b120a4..2c6da2d 100644
--- a/fsdev/Makefile.objs
+++ b/fsdev/Makefile.objs
@@ -7,6 +7,7 @@  common-obj-y = qemu-fsdev-dummy.o
 endif
 common-obj-y += qemu-fsdev-opts.o
 
+common-obj-y += qemu-fsdev-throttle.o
 # Toplevel always builds this; targets without virtio will put it in
 # common-obj-y
 common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 6db9fea..33fe822 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -17,6 +17,7 @@ 
 #include <dirent.h>
 #include <utime.h>
 #include <sys/vfs.h>
+#include "qemu-fsdev-throttle.h"
 
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700
@@ -74,6 +75,7 @@  typedef struct FsDriverEntry {
     char *path;
     int export_flags;
     FileOperations *ops;
+    FsThrottle fst;
 } FsDriverEntry;
 
 typedef struct FsContext
@@ -83,6 +85,7 @@  typedef struct FsContext
     int export_flags;
     struct xattr_operations **xops;
     struct extended_ops exops;
+    FsThrottle *fst;
     /* fs driver specific data */
     void *private;
 } FsContext;
diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
index 1dd8c7a..395d497 100644
--- a/fsdev/qemu-fsdev-opts.c
+++ b/fsdev/qemu-fsdev-opts.c
@@ -37,6 +37,82 @@  static QemuOptsList qemu_fsdev_opts = {
         }, {
             .name = "sock_fd",
             .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "throttling.iops-total",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total I/O operations per second",
+        },{
+            .name = "throttling.iops-read",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read operations per second",
+        },{
+            .name = "throttling.iops-write",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write operations per second",
+        },{
+            .name = "throttling.bps-total",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total bytes per second",
+        },{
+            .name = "throttling.bps-read",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read bytes per second",
+        },{
+            .name = "throttling.bps-write",
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write bytes per second",
+        },{
+            .name = "throttling.iops-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations burst",
+        },{
+            .name = "throttling.iops-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations read burst",
+        },{
+            .name = "throttling.iops-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations write burst",
+        },{
+            .name = "throttling.bps-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes burst",
+        },{
+            .name = "throttling.bps-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes read burst",
+        },{
+            .name = "throttling.bps-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes write burst",
+        },{
+            .name = "throttling.iops-total-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-total-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-read-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-read-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-write-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iops-write-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-total-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-total-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-read-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-read-max burst period, in seconds",
+        },{
+            .name = "throttling.bps-write-max-length",
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bps-write-max burst period, in seconds",
+        },{
+            .name = "throttling.iops-size",
+            .type = QEMU_OPT_NUMBER,
+            .help = "when limiting by iops max size of an I/O in bytes",
         },
 
         { /*End of list */ }
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
new file mode 100644
index 0000000..f73d199
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -0,0 +1,166 @@ 
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu-fsdev-throttle.h"
+
+static bool fsdev_throttle_check_for_wait(FsThrottle *fst, bool is_write)
+{
+   return  throttle_schedule_timer(&fst->ts, &fst->tt, is_write);
+}
+
+static void fsdev_throttle_schedule_next_request(FsThrottle *fst, bool is_write)
+{
+    bool must_wait;
+
+    if (qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
+        return;
+    }
+    must_wait = fsdev_throttle_check_for_wait(fst, is_write);
+    if (!must_wait) {
+        if (qemu_in_coroutine() &&
+            qemu_co_queue_next(&fst->throttled_reqs[is_write])) {
+            ;
+       } else {
+           int64_t now = qemu_clock_get_ns(fst->tt.clock_type);
+           timer_mod(fst->tt.timers[is_write], now + 1);
+       }
+   }
+}
+
+static void fsdev_throttle_timer_cb(FsThrottle *fst, bool is_write)
+{
+    bool empty_queue;
+    empty_queue = !qemu_co_enter_next(&fst->throttled_reqs[is_write]);
+    if (empty_queue) {
+        fsdev_throttle_schedule_next_request(fst, is_write);
+    }
+}
+
+static void fsdev_throttle_read_timer_cb(void *opaque)
+{
+    fsdev_throttle_timer_cb(opaque, false);
+}
+
+static void fsdev_throttle_write_timer_cb(void *opaque)
+{
+    fsdev_throttle_timer_cb(opaque, true);
+}
+
+static void fsdev_parse_io_limit_opts(QemuOpts *opts, FsThrottle *fst)
+{
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
+          qemu_opt_get_number(opts, "throttling.bps-total", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
+          qemu_opt_get_number(opts, "throttling.bps-read", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
+          qemu_opt_get_number(opts, "throttling.bps-write", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
+          qemu_opt_get_number(opts, "throttling.iops-total", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
+          qemu_opt_get_number(opts, "throttling.iops-read", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
+          qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
+          qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
+          qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
+          qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
+          qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].max =
+          qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
+          qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
+          qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
+          qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
+          qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
+          qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+    fst->cfg.op_size =
+          qemu_opt_get_number(opts, "throttling.iops-size", 0);
+}
+
+int fsdev_throttle_configure_iolimits(QemuOpts *opts, FsThrottle *fst)
+{
+    Error *err = NULL;
+
+    /* Parse and set populate the cfg structure */
+    fsdev_parse_io_limit_opts(opts, fst);
+
+    if (!throttle_is_valid(&fst->cfg, &err)) {
+        error_reportf_err(err, "Throttle configuration is not valid: ");
+        return -1;
+    }
+    if (throttle_enabled(&fst->cfg)) {
+        g_assert((fst->aioctx = qemu_get_aio_context()));
+        throttle_init(&fst->ts);
+        throttle_timers_init(&fst->tt,
+                             fst->aioctx,
+                             QEMU_CLOCK_REALTIME,
+                             fsdev_throttle_read_timer_cb,
+                             fsdev_throttle_write_timer_cb,
+                             fst);
+        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
+        qemu_co_queue_init(&fst->throttled_reqs[0]);
+        qemu_co_queue_init(&fst->throttled_reqs[1]);
+   }
+   return 0;
+}
+
+static int get_num_bytes(struct iovec *iov, int iovcnt)
+{
+    int index = 0;
+    int bytes = 0;
+
+    while (index < iovcnt) {
+        bytes += iov[index].iov_len;
+        index++;
+    }
+    return bytes;
+}
+
+void fsdev_throttle_request(FsThrottle *fst, bool is_write,
+                            struct iovec *iov, int iovcnt)
+{
+    int bytes;
+
+    if (throttle_enabled(&fst->cfg)) {
+        bytes = get_num_bytes(iov, iovcnt);
+        bool must_wait = fsdev_throttle_check_for_wait(fst, is_write);
+        if (must_wait || !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
+            qemu_co_queue_wait(&fst->throttled_reqs[is_write]);
+       }
+       throttle_account(&fst->ts, is_write, bytes);
+
+       fsdev_throttle_schedule_next_request(fst, is_write);
+    }
+}
+
+void fsdev_throttle_cleanup(FsThrottle *fst)
+{
+    throttle_timers_destroy(&fst->tt);
+}
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
new file mode 100644
index 0000000..0535df0
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.h
@@ -0,0 +1,37 @@ 
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#ifndef _FSDEV_THROTTLE_H
+#define _FSDEV_THROTTLE_H
+
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qemu/throttle.h"
+
+typedef struct FsThrottle {
+    ThrottleState ts;
+    ThrottleTimers tt;
+    AioContext   *aioctx;
+    ThrottleConfig cfg;
+    CoQueue      throttled_reqs[2];
+} FsThrottle;
+
+int fsdev_throttle_configure_iolimits(QemuOpts *, FsThrottle *);
+
+void fsdev_throttle_request(FsThrottle *fst, bool is_write,
+                            struct iovec *iov, int iovcnt);
+
+void fsdev_throttle_cleanup(FsThrottle *);
+#endif /* _FSDEV_THROTTLE_H */
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 3f271fc..54459f2 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -436,8 +436,8 @@  static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
                              const struct iovec *iov,
                              int iovcnt, off_t offset)
 {
-    ssize_t ret
-;
+    ssize_t ret;
+
 #ifdef CONFIG_PREADV
     ret = pwritev(fs->fd, iov, iovcnt, offset);
 #else
@@ -1240,6 +1240,11 @@  static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         error_report("fsdev: No path specified");
         return -1;
     }
+
+    if (fsdev_throttle_configure_iolimits(opts, &fse->fst) < 0) {
+        return -1;
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index dfe293d..ccf6d0c 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3490,6 +3490,9 @@  int v9fs_device_realize_common(V9fsState *s, Error **errp)
         error_setg(errp, "share path %s is not a directory", fse->path);
         goto out;
     }
+
+    s->ctx.fst = &fse->fst;
+
     v9fs_path_free(&path);
 
     rc = 0;
@@ -3504,6 +3507,9 @@  out:
 
 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
 {
+    if (throttle_enabled(&s->ctx.fst->cfg)) {
+        fsdev_throttle_cleanup(s->ctx.fst);
+    }
     g_free(s->ctx.fs_root);
     g_free(s->tag);
 }
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 10343c0..9fb7bca 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -16,6 +16,7 @@ 
 #include "qemu/thread.h"
 #include "qemu/coroutine.h"
 #include "coth.h"
+#include "fsdev/qemu-fsdev-throttle.h"
 
 int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
                    V9fsStatDotl *v9stat)
@@ -245,6 +246,8 @@  int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+
+    fsdev_throttle_request(s->ctx.fst, true, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -264,6 +267,8 @@  int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+
+    fsdev_throttle_request(s->ctx.fst, false, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);