diff mbox

Simple performance logging and network limiting based on trace option

Message ID 1414510422-8277-1-git-send-email-rehs@gmx.de
State New
Headers show

Commit Message

Harald Schieche Oct. 28, 2014, 3:33 p.m. UTC

Comments

Stefan Hajnoczi Oct. 29, 2014, 3:09 p.m. UTC | #1
On Tue, Oct 28, 2014 at 04:33:42PM +0100, Harald Schieche wrote:

Missing commit description:

What problem are you trying to solve?

The "Signed-off-by" goes here.

> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index 475cf74..3c5cc71 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -1031,6 +1031,27 @@ static int aio_worker(void *arg)
>      return ret;
>  }
>  
> +static void log_guest_storage_performance(void)
> +{
> +    /*
> +     * Performance logging isn't specified yet.
> +     * Therefore we're using existing tracing.
> +     */
> +    static int64_t logged_clock;
> +    static int64_t counter;

There can be multiple threads, static variables will not work.

> +    int64_t clock = get_clock();
> +
> +    counter++;
> +    if (clock - logged_clock >= 1000000000LL) {

You are trying to identify calls to log_guest_storage_performance() that
are more than 1 second apart?  I'm not sure what you're trying to
measure.

It is simplest to have unconditional trace events and calculate
latencies during trace file analysis.  That way no arbitrary constants
like 1 second are hard-coded into QEMU.

> diff --git a/net/queue.c b/net/queue.c
> index f948318..2b0fef7 100644
> --- a/net/queue.c
> +++ b/net/queue.c
> @@ -23,7 +23,9 @@
>  
>  #include "net/queue.h"
>  #include "qemu/queue.h"
> +#include "qemu/timer.h"
>  #include "net/net.h"
> +#include "trace.h"
>  
>  /* The delivery handler may only return zero if it will call
>   * qemu_net_queue_flush() when it determines that it is once again able
> @@ -58,6 +60,15 @@ struct NetQueue {
>      unsigned delivering : 1;
>  };
>  
> +static int64_t bandwidth_limit;     /* maximum number of bits per second */

Throttling should be per-device, not global.

> +
> +void qemu_net_set_bandwidth_limit(int64_t limit)
> +{
> +    bandwidth_limit = limit;
> +    trace_qemu_net_set_bandwidth_limit(limit);
> +}
> +
> +
>  NetQueue *qemu_new_net_queue(void *opaque)
>  {
>      NetQueue *queue;
> @@ -175,6 +186,48 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
>      return ret;
>  }
>  
> +static int64_t limit_network_performance(int64_t start_clock,
> +                                         int64_t bytes)
> +{
> +    int64_t clock = get_clock();
> +    int64_t sleep_usecs = 0;
> +    if (bandwidth_limit > 0) {
> +        sleep_usecs = (bytes * 8 * 1000000LL) / bandwidth_limit -
> +                      (clock - start_clock) / 1000LL;
> +    }
> +    if (sleep_usecs > 0) {
> +        usleep(sleep_usecs);

This does more than limit the network performance, it can also freeze
the guest.

QEMU is event-driven.  The event loop thread is not allowed to block or
sleep - otherwise the vcpu threads will block when they try to acquire
the QEMU global mutex.
Harald Schieche Oct. 30, 2014, 2:05 p.m. UTC | #2
> Missing commit description:
> 
> What problem are you trying to solve?
> 

I want to log the storage (iops per second) and
network speed (packets and bandwidth per second)

I want to limit the network traffic to a specific bandwidth.

> 
> The "Signed-off-by" goes here.
> 
> > diff --git a/block/raw-posix.c b/block/raw-posix.c
> > index 475cf74..3c5cc71 100644
> > --- a/block/raw-posix.c
> > +++ b/block/raw-posix.c
> > @@ -1031,6 +1031,27 @@ static int aio_worker(void *arg)
> >      return ret;
> >  }
> >  
> > +static void log_guest_storage_performance(void)
> > +{
> > +    /*
> > +     * Performance logging isn't specified yet.
> > +     * Therefore we're using existing tracing.
> > +     */
> > +    static int64_t logged_clock;
> > +    static int64_t counter;
> 
> There can be multiple threads, static variables will not work.

ok, I have to fix it.

> 
> > +    int64_t clock = get_clock();
> > +
> > +    counter++;
> > +    if (clock - logged_clock >= 1000000000LL) {
> 
> You are trying to identify calls to log_guest_storage_performance() that
> are more than 1 second apart?  I'm not sure what you're trying to
> measure.

I want to measure calls per second and I want to log when i/o is active

> 
> It is simplest to have unconditional trace events and calculate
> latencies during trace file analysis.  That way no arbitrary constants
> like 1 second are hard-coded into QEMU.

We already have an unconditional trace event (paio_submit) but maybe there
are too many calls of it.

> 
> > diff --git a/net/queue.c b/net/queue.c
> > index f948318..2b0fef7 100644
> > --- a/net/queue.c
> > +++ b/net/queue.c
> > @@ -23,7 +23,9 @@
> >  
> >  #include "net/queue.h"
> >  #include "qemu/queue.h"
> > +#include "qemu/timer.h"
> >  #include "net/net.h"
> > +#include "trace.h"
> >  
> >  /* The delivery handler may only return zero if it will call
> >   * qemu_net_queue_flush() when it determines that it is once again able
> > @@ -58,6 +60,15 @@ struct NetQueue {
> >      unsigned delivering : 1;
> >  };
> >  
> > +static int64_t bandwidth_limit;     /* maximum number of bits per second */
> 
> Throttling should be per-device, not global.

Maybe this would be better. But this patch should be most simple.

> 
> > +
> > +void qemu_net_set_bandwidth_limit(int64_t limit)
> > +{
> > +    bandwidth_limit = limit;
> > +    trace_qemu_net_set_bandwidth_limit(limit);
> > +}
> > +
> > +
> >  NetQueue *qemu_new_net_queue(void *opaque)
> >  {
> >      NetQueue *queue;
> > @@ -175,6 +186,48 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
> >      return ret;
> >  }
> >  
> > +static int64_t limit_network_performance(int64_t start_clock,
> > +                                         int64_t bytes)
> > +{
> > +    int64_t clock = get_clock();
> > +    int64_t sleep_usecs = 0;
> > +    if (bandwidth_limit > 0) {
> > +        sleep_usecs = (bytes * 8 * 1000000LL) / bandwidth_limit -
> > +                      (clock - start_clock) / 1000LL;
> > +    }
> > +    if (sleep_usecs > 0) {
> > +        usleep(sleep_usecs);
> 
> This does more than limit the network performance, it can also freeze
> the guest.
> 
> QEMU is event-driven.  The event loop thread is not allowed to block or
> sleep - otherwise the vcpu threads will block when they try to acquire
> the QEMU global mutex.
> 

Yes, it freezes the guest. That's not fine, but simple.
Stefan Hajnoczi Oct. 31, 2014, 11:13 a.m. UTC | #3
On Thu, Oct 30, 2014 at 03:05:11PM +0100, harald Schieche wrote:
> > Missing commit description:
> > 
> > What problem are you trying to solve?
> > 
> 
> I want to log the storage (iops per second) and
> network speed (packets and bandwidth per second)

QEMU offers the query-blockstats QMP command to poll I/O statistics for
block devices.

Nowadays a lot of KVM users bypass the QEMU network subsystem and use
the vhost-net Linux host kernel module instead.  That is the
highest-performance and most actively developed networking path.  Are
you sure you don't want to use vhost-net?

> I want to limit the network traffic to a specific bandwidth.

You can use the host kernel's firewall or traffic shaping features to do
that when using a tap device (most common production configuration).
For example, libvirt offers this feature and uses tc under the hood.

> > It is simplest to have unconditional trace events and calculate
> > latencies during trace file analysis.  That way no arbitrary constants
> > like 1 second are hard-coded into QEMU.
> 
> We already have an unconditional trace event (paio_submit) but maybe there
> are too many calls of it.

If you add the BlockDriverState *bs pointer to the paio_submit call,
then you can distinguish between drives.

However, tracing is not mean as a stable interface for building other
features.  Trace events can change and are mainly used for interactive
or ad-hoc instrumentation.

If you build a tool on top of trace events, be prepared to actively
maintain it as the set of trace events evolves over time.  It's not a
stable ABI.

> > > diff --git a/net/queue.c b/net/queue.c
> > > index f948318..2b0fef7 100644
> > > --- a/net/queue.c
> > > +++ b/net/queue.c
> > > @@ -23,7 +23,9 @@
> > >  
> > >  #include "net/queue.h"
> > >  #include "qemu/queue.h"
> > > +#include "qemu/timer.h"
> > >  #include "net/net.h"
> > > +#include "trace.h"
> > >  
> > >  /* The delivery handler may only return zero if it will call
> > >   * qemu_net_queue_flush() when it determines that it is once again able
> > > @@ -58,6 +60,15 @@ struct NetQueue {
> > >      unsigned delivering : 1;
> > >  };
> > >  
> > > +static int64_t bandwidth_limit;     /* maximum number of bits per second */
> > 
> > Throttling should be per-device, not global.
> 
> Maybe this would be better. But this patch should be most simple.

Everything in the network subsystem is per-NetClientState.  It doesn't
make sense to introduce global state just because it's easier.

> > > +static int64_t limit_network_performance(int64_t start_clock,
> > > +                                         int64_t bytes)
> > > +{
> > > +    int64_t clock = get_clock();
> > > +    int64_t sleep_usecs = 0;
> > > +    if (bandwidth_limit > 0) {
> > > +        sleep_usecs = (bytes * 8 * 1000000LL) / bandwidth_limit -
> > > +                      (clock - start_clock) / 1000LL;
> > > +    }
> > > +    if (sleep_usecs > 0) {
> > > +        usleep(sleep_usecs);
> > 
> > This does more than limit the network performance, it can also freeze
> > the guest.
> > 
> > QEMU is event-driven.  The event loop thread is not allowed to block or
> > sleep - otherwise the vcpu threads will block when they try to acquire
> > the QEMU global mutex.
> > 
> 
> Yes, it freezes the guest. That's not fine, but simple.

I won't merge this approach.
diff mbox

Patch

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 475cf74..3c5cc71 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1031,6 +1031,27 @@  static int aio_worker(void *arg)
     return ret;
 }
 
+static void log_guest_storage_performance(void)
+{
+    /*
+     * Performance logging isn't specified yet.
+     * Therefore we're using existing tracing.
+     */
+    static int64_t logged_clock;
+    static int64_t counter;
+    int64_t clock = get_clock();
+
+    counter++;
+    if (clock - logged_clock >= 1000000000LL) {
+        if (logged_clock > 0) { /* don't log first event */
+            trace_log_guest_storage_performance
+                (counter, (clock - logged_clock) / 1000000000LL);
+        }
+        counter = 0;
+        logged_clock = clock;
+    }
+}
+
 static int paio_submit_co(BlockDriverState *bs, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         int type)
@@ -1051,6 +1072,7 @@  static int paio_submit_co(BlockDriverState *bs, int fd,
         assert(qiov->size == acb->aio_nbytes);
     }
 
+    log_guest_storage_performance();
     trace_paio_submit_co(sector_num, nb_sectors, type);
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_co(pool, aio_worker, acb);
@@ -1076,6 +1098,7 @@  static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
         assert(qiov->size == acb->aio_nbytes);
     }
 
+    log_guest_storage_performance();
     trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 7b58881..8cb2743 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -138,6 +138,27 @@  static int aio_worker(void *arg)
     return ret;
 }
 
+static void log_guest_storage_performance(void)
+{
+    /*
+     * Performance logging isn't specified yet.
+     * Therefore we're using existing tracing.
+     */
+    static int64_t logged_clock;
+    static int64_t counter;
+    int64_t clock = get_clock();
+
+    counter++;
+    if (clock - logged_clock >= 1000000000LL) {
+        if (logged_clock > 0) { /* don't log first event */
+            trace_log_guest_storage_performance
+                (counter, (clock - logged_clock) / 1000000000LL);
+        }
+        counter = 0;
+        logged_clock = clock;
+    }
+}
+
 static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockCompletionFunc *cb, void *opaque, int type)
@@ -156,6 +177,7 @@  static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
     acb->aio_nbytes = nb_sectors * 512;
     acb->aio_offset = sector_num * 512;
 
+    log_guest_storage_performance();
     trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
diff --git a/include/net/queue.h b/include/net/queue.h
index fc02b33..d8ea589 100644
--- a/include/net/queue.h
+++ b/include/net/queue.h
@@ -34,6 +34,8 @@  typedef void (NetPacketSent) (NetClientState *sender, ssize_t ret);
 #define QEMU_NET_PACKET_FLAG_NONE  0
 #define QEMU_NET_PACKET_FLAG_RAW  (1<<0)
 
+void qemu_net_set_bandwidth_limit(int64_t limit);
+
 NetQueue *qemu_new_net_queue(void *opaque);
 
 void qemu_del_net_queue(NetQueue *queue);
diff --git a/net/queue.c b/net/queue.c
index f948318..2b0fef7 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -23,7 +23,9 @@ 
 
 #include "net/queue.h"
 #include "qemu/queue.h"
+#include "qemu/timer.h"
 #include "net/net.h"
+#include "trace.h"
 
 /* The delivery handler may only return zero if it will call
  * qemu_net_queue_flush() when it determines that it is once again able
@@ -58,6 +60,15 @@  struct NetQueue {
     unsigned delivering : 1;
 };
 
+static int64_t bandwidth_limit;     /* maximum number of bits per second */
+
+void qemu_net_set_bandwidth_limit(int64_t limit)
+{
+    bandwidth_limit = limit;
+    trace_qemu_net_set_bandwidth_limit(limit);
+}
+
+
 NetQueue *qemu_new_net_queue(void *opaque)
 {
     NetQueue *queue;
@@ -175,6 +186,48 @@  static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
     return ret;
 }
 
+static int64_t limit_network_performance(int64_t start_clock,
+                                         int64_t bytes)
+{
+    int64_t clock = get_clock();
+    int64_t sleep_usecs = 0;
+    if (bandwidth_limit > 0) {
+        sleep_usecs = (bytes * 8 * 1000000LL) / bandwidth_limit -
+                      (clock - start_clock) / 1000LL;
+    }
+    if (sleep_usecs > 0) {
+        usleep(sleep_usecs);
+        clock = get_clock();
+    }
+
+    return clock;
+}
+
+static void log_and_limit_network_performance(size_t size)
+{
+    /*
+     * Performance logging isn't specified yet.
+     * Therefore we're using existing tracing.
+     */
+    static int64_t logged_clock;
+    static int64_t packets;
+    static int64_t bytes;
+    int64_t clock = 0;
+
+    packets++;
+    bytes = bytes + size;
+    clock = limit_network_performance(logged_clock, bytes);
+    if (clock - logged_clock >= 1000000000LL) {
+        if (logged_clock > 0) { /* don't log first event */
+            trace_log_network_performance
+                (packets, bytes*8, (clock - logged_clock) / 1000000000LL);
+        }
+        packets = 0;
+        bytes = 0;
+        logged_clock = clock;
+    }
+}
+
 ssize_t qemu_net_queue_send(NetQueue *queue,
                             NetClientState *sender,
                             unsigned flags,
@@ -184,6 +237,7 @@  ssize_t qemu_net_queue_send(NetQueue *queue,
 {
     ssize_t ret;
 
+    log_and_limit_network_performance(size) ;
     if (queue->delivering || !qemu_can_send_packet(sender)) {
         qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
         return 0;
diff --git a/qemu-options.hx b/qemu-options.hx
index 22cf3b9..35aee69 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1916,6 +1916,19 @@  override the default configuration (@option{-net nic -net user}) which
 is activated if no @option{-net} options are provided.
 ETEXI
 
+DEF("bandwidth", HAS_ARG, QEMU_OPTION_bandwidth,
+	"-bandwidth [limit=]n\n"
+	"                set the maximum bandwidth[bits per second] to 'n' (default=0, no limit)\n",
+	    QEMU_ARCH_ALL)
+STEXI
+@item -bandwidth [limit=]@var{n}]
+@findex -bandwidth
+Limit the network bandwith (bits per second).
+If the limit is reached, Qemu will sleep.
+0, the default means no limit.
+This option is active if trace-event "log_network_performance" is active
+ETEXI
+
 STEXI
 @end table
 ETEXI
diff --git a/trace-events b/trace-events
index 6c3a400..04b2d36 100644
--- a/trace-events
+++ b/trace-events
@@ -134,6 +134,7 @@  thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
 # block/raw-posix.c
 paio_submit_co(int64_t sector_num, int nb_sectors, int type) "sector_num %"PRId64" nb_sectors %d type %d"
 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
+log_guest_storage_performance(int64_t counter, int64_t seconds) "counter %"PRId64" seconds %"PRId64"
 
 # ioport.c
 cpu_in(unsigned int addr, unsigned int val) "addr %#x value %u"
@@ -1378,3 +1379,7 @@  i8257_unregistered_dma(int nchan, int dma_pos, int dma_len) "unregistered DMA ch
 cpu_set_state(int cpu_index, uint8_t state) "setting cpu %d state to %" PRIu8
 cpu_halt(int cpu_index) "halting cpu %d"
 cpu_unhalt(int cpu_index) "unhalting cpu %d"
+
+# net/queue.c
+qemu_net_set_bandwidth_limit(int64_t limit) "bits per second %"PRId64"
+log_network_performance(int64_t packets, int64_t bits, int64_t seconds) "packets %"PRId64" "bits %"PRId64" seconds %"PRId64"
diff --git a/vl.c b/vl.c
index 2f81384..02a91e1 100644
--- a/vl.c
+++ b/vl.c
@@ -1309,6 +1309,30 @@  static void smp_parse(QemuOpts *opts)
 
 }
 
+static QemuOptsList qemu_bandwidth_opts = {
+    .name = "bandwidth-opts",
+    .implied_opt_name = "limit",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_bandwidth_opts.head),
+    .desc = {
+        {
+            .name = "limit",
+            .type = QEMU_OPT_NUMBER,
+        },
+        { /*End of list */ }
+    },
+};
+
+static void bandwidth_parse(QemuOpts *opts)
+{
+    if (opts) {
+
+        qemu_net_set_bandwidth_limit(qemu_opt_get_number(opts, "limit", 0));
+
+    }
+
+}
+
 static void realtime_init(void)
 {
     if (enable_mlock) {
@@ -2758,6 +2782,7 @@  int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_machine_opts);
     qemu_add_opts(&qemu_mem_opts);
     qemu_add_opts(&qemu_smp_opts);
+    qemu_add_opts(&qemu_bandwidth_opts);
     qemu_add_opts(&qemu_boot_opts);
     qemu_add_opts(&qemu_sandbox_opts);
     qemu_add_opts(&qemu_add_fd_opts);
@@ -3517,6 +3542,12 @@  int main(int argc, char **argv, char **envp)
                     exit(1);
                 }
                 break;
+            case QEMU_OPTION_bandwidth:
+                if (!qemu_opts_parse(qemu_find_opts("bandwidth-opts"),
+                                     optarg, 1)) {
+                    exit(1);
+                }
+                break;
 	    case QEMU_OPTION_vnc:
 #ifdef CONFIG_VNC
                 display_remote++;
@@ -3862,6 +3893,8 @@  int main(int argc, char **argv, char **envp)
         exit(1);
     }
 
+    bandwidth_parse(qemu_opts_find(qemu_find_opts("bandwidth-opts"), NULL));
+
     /*
      * Get the default machine options from the machine if it is not already
      * specified either by the configuration file or by the command line.