Patchwork [For,stable-0.12] virtio: Add memory statistics reporting to the balloon driver (V5)

login
register
mail settings
Submitter Adam Litke
Date Dec. 9, 2009, 6:42 p.m.
Message ID <1260384154.3429.6.camel@aglitke>
Download mbox | patch
Permalink /patch/40756/
State New
Headers show

Comments

Adam Litke - Dec. 9, 2009, 6:42 p.m.
Reverted the vmstate changes since that has not made it into stable-0.12 yet.

This iteration addresses all of the comments from the last round.  Thanks to
everyone for their careful reviews and helpful comments.  The most significant
change in this version is my use of the QObject API, so a concentrated review
in that area would be most appreciated.  I am hoping to target 0.12.0 with this
patch.  Please let me know if that remains a possibility.  Thanks.

Changes since V4:
 - Virtio spec updated: http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: qemu-devel@nongnu.org

Patch

diff --git a/balloon.h b/balloon.h
index 60b4a5d..23bbffe 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@ 
 
 #include "cpu-defs.h"
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef QObject *(QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+QObject *qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..f3bc138 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@ 
 #include "balloon.h"
 #include "virtio-balloon.h"
 #include "kvm.h"
+#include "monitor.h"
+#include "qlist.h"
+#include "qint.h"
+#include "qstring.h"
 
 #if defined(__linux__)
 #include <sys/mman.h>
@@ -27,9 +31,13 @@ 
 typedef struct VirtIOBalloon
 {
     VirtIODevice vdev;
-    VirtQueue *ivq, *dvq;
+    VirtQueue *ivq, *dvq, *svq;
     uint32_t num_pages;
     uint32_t actual;
+    uint64_t stats[VIRTIO_BALLOON_S_NR];
+    VirtQueueElement stats_vq_elem;
+    size_t stats_vq_offset;
+    uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +54,35 @@  static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+    int i;
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+}
+
+static void stat_put(QList *list, const char *label, uint64_t val)
+{
+    if (val != -1) {
+        qlist_append(list, qstring_from_str(label));
+        qlist_append(list, qint_from_int(val));
+    }
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+    QList *list = qlist_new();
+    uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+
+    stat_put(list, "actual", (int)actual >> 20);
+    stat_put(list, "mem_swapped_in", dev->stats[VIRTIO_BALLOON_S_SWAP_IN]);
+    stat_put(list, "mem_swapped_out", dev->stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+    stat_put(list, "major_page_faults", dev->stats[VIRTIO_BALLOON_S_MAJFLT]);
+    stat_put(list, "minor_page_faults", dev->stats[VIRTIO_BALLOON_S_MINFLT]);
+    stat_put(list, "free_mem", dev->stats[VIRTIO_BALLOON_S_MEMFREE]);
+    stat_put(list, "total_mem", dev->stats[VIRTIO_BALLOON_S_MEMTOT]);
+    return QOBJECT(list);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +141,36 @@  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+    VirtQueueElement *elem = &s->stats_vq_elem;
+    VirtIOBalloonStat stat;
+    size_t offset = 0;
+
+    if (!virtqueue_pop(vq, elem))
+        return;
+
+    while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg,
+                                elem->out_num) == sizeof(stat)) {
+        uint16_t tag = tswap16(stat.tag);
+        uint64_t val = tswap64(stat.val);
+
+        offset += sizeof(stat);
+        if (tag < VIRTIO_BALLOON_S_NR)
+            s->stats[tag] = val;
+    }
+    s->stats_vq_offset = offset;
+
+    if (s->stats_requested) {
+        QObject *stats = get_stats_qobject(s);
+        monitor_print_balloon(cur_mon, stats);
+        qobject_decref(stats);
+        monitor_resume(cur_mon);
+        s->stats_requested = 0;
+    }
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,12 +193,22 @@  static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-    return 0;
+    return 1 << VIRTIO_BALLOON_F_STATS_VQ;
+}
+
+static void request_stats(VirtIOBalloon *vb)
+{
+    vb->stats_requested = 1;
+    reset_stats(vb);
+    monitor_suspend(cur_mon);
+    virtqueue_push(vb->svq, &vb->stats_vq_elem, vb->stats_vq_offset);
+    virtio_notify(&vb->vdev, vb->svq);
 }
 
-static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+static QObject *virtio_balloon_to_target(void *opaque, ram_addr_t target)
 {
     VirtIOBalloon *dev = opaque;
+    QObject *ret = NULL;
 
     if (target > ram_size)
         target = ram_size;
@@ -139,9 +216,15 @@  static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
     if (target) {
         dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
         virtio_notify_config(&dev->vdev);
+    } else if (dev->vdev.features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
+        request_stats(dev);
+        ret = QOBJECT(qlist_new());
+    } else {
+        reset_stats(dev);
+        ret = get_stats_qobject(dev);
     }
 
-    return ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+    return ret;
 }
 
 static void virtio_balloon_save(QEMUFile *f, void *opaque)
@@ -152,6 +235,9 @@  static void virtio_balloon_save(QEMUFile *f, void *opaque)
 
     qemu_put_be32(f, s->num_pages);
     qemu_put_be32(f, s->actual);
+    qemu_put_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+    qemu_put_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+    qemu_put_byte(f, s->stats_requested);
 }
 
 static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
@@ -165,6 +251,9 @@  static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
 
     s->num_pages = qemu_get_be32(f);
     s->actual = qemu_get_be32(f);
+    qemu_get_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+    qemu_get_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+    s->stats_requested = qemu_get_byte(f);
 
     return 0;
 }
@@ -183,6 +272,7 @@  VirtIODevice *virtio_balloon_init(DeviceState *dev)
 
     s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+    s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
 
     qemu_add_balloon_handler(virtio_balloon_to_target, s);
 
diff --git a/hw/virtio-balloon.h b/hw/virtio-balloon.h
index 9a0d119..e20cf6b 100644
--- a/hw/virtio-balloon.h
+++ b/hw/virtio-balloon.h
@@ -25,6 +25,7 @@ 
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1       /* Memory stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -37,4 +38,18 @@  struct virtio_balloon_config
     uint32_t actual;
 };
 
+/* Memory Statistics */
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+typedef struct VirtIOBalloonStat {
+    uint16_t tag;
+    uint64_t val;
+} __attribute__((packed)) VirtIOBalloonStat;
+
 #endif
diff --git a/monitor.c b/monitor.c
index a38a103..20d3681 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1917,10 +1917,26 @@  static void do_balloon(Monitor *mon, const QDict *qdict, QObject **ret_data)
     qemu_balloon(target << 20);
 }
 
-static void monitor_print_balloon(Monitor *mon, const QObject *data)
+void monitor_print_balloon(Monitor *mon, const QObject *data)
 {
-    monitor_printf(mon, "balloon: actual=%d\n",
-                                     (int)qint_get_int(qobject_to_qint(data)));
+    QList *list = qobject_to_qlist(data);
+    QString *label;
+    QInt *val;
+
+    if (qlist_empty(list))
+        return;
+
+    label = qobject_to_qstring(qlist_pop(list));
+    val = qobject_to_qint(qlist_pop(list));
+    monitor_printf(mon, "balloon: actual=%d", (int)qint_get_int(val));
+
+    while (!qlist_empty(list)) {
+        label = qobject_to_qstring(qlist_pop(list));
+        val = qobject_to_qint(qlist_pop(list));
+        monitor_printf(mon, ",%s=%lu", qstring_get_str(label),
+                       (uint64_t)qint_get_int(val));
+    } 
+    monitor_printf(mon, "\n");
 }
 
 /**
@@ -1928,15 +1944,11 @@  static void monitor_print_balloon(Monitor *mon, const QObject *data)
  */
 static void do_info_balloon(Monitor *mon, QObject **ret_data)
 {
-    ram_addr_t actual;
-
-    actual = qemu_balloon_status();
     if (kvm_enabled() && !kvm_has_sync_mmu())
         qemu_error_new(QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
-    else if (actual == 0)
+    *ret_data = qemu_balloon_status();
+    if (*ret_data == NULL)
         qemu_error_new(QERR_DEVICE_NOT_ACTIVE, "balloon");
-    else
-        *ret_data = QOBJECT(qint_from_int((int)(actual >> 20)));
 }
 
 static qemu_acl *find_acl(Monitor *mon, const char *name)
diff --git a/monitor.h b/monitor.h
index 38cc223..bd59f7d 100644
--- a/monitor.h
+++ b/monitor.h
@@ -33,6 +33,7 @@  void monitor_resume(Monitor *mon);
 void monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
                                  BlockDriverCompletionFunc *completion_cb,
                                  void *opaque);
+void monitor_print_balloon(Monitor *mon, const QObject *data);
 
 int monitor_get_fd(Monitor *mon, const char *fdname);
 
diff --git a/vl.c b/vl.c
index 09a0ec5..f379f44 100644
--- a/vl.c
+++ b/vl.c
@@ -331,11 +331,11 @@  void qemu_balloon(ram_addr_t target)
         qemu_balloon_event(qemu_balloon_event_opaque, target);
 }
 
-ram_addr_t qemu_balloon_status(void)
+QObject *qemu_balloon_status(void)
 {
     if (qemu_balloon_event)
         return qemu_balloon_event(qemu_balloon_event_opaque, 0);
-    return 0;
+    return NULL;
 }
 
 /***********************************************************/