Patchwork [PULL,13/28] exec: memory radix tree page level compression

login
register
mail settings
Submitter Michael S. Tsirkin
Date Dec. 11, 2013, 6:30 p.m.
Message ID <1386786509-29966-13-git-send-email-mst@redhat.com>
Download mbox | patch
Permalink /patch/300319/
State New
Headers show

Comments

Michael S. Tsirkin - Dec. 11, 2013, 6:30 p.m.
At the moment, memory radix tree is already variable width, but it can
only skip the low bits of address.

This is efficient if we have huge memory regions but inefficient if we
are only using a tiny portion of the address space.

After we have built up the map, detect
configurations where a single L2 entry is valid.

We then speed up the lookup by skipping one or more levels.
In case any levels were skipped, we might end up in a valid section
instead of erroring out. We handle this by checking that
the address is in range of the resulting section.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 exec.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

Patch

diff --git a/exec.c b/exec.c
index b528dad..7e5ce93 100644
--- a/exec.c
+++ b/exec.c
@@ -51,6 +51,8 @@ 
 
 #include "exec/memory-internal.h"
 
+#include "qemu/range.h"
+
 //#define DEBUG_SUBPAGE
 
 #if !defined(CONFIG_USER_ONLY)
@@ -216,6 +218,68 @@  static void phys_page_set(AddressSpaceDispatch *d,
     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
+/* Compact a non leaf page entry. Simply detect that the entry has a single child,
+ * and update our entry so we can skip it and go directly to the destination.
+ */
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
+{
+    unsigned valid_ptr = P_L2_SIZE;
+    int valid = 0;
+    PhysPageEntry *p;
+    int i;
+
+    if (lp->ptr == PHYS_MAP_NODE_NIL) {
+        return;
+    }
+
+    p = nodes[lp->ptr];
+    for (i = 0; i < P_L2_SIZE; i++) {
+        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
+            continue;
+        }
+
+        valid_ptr = i;
+        valid++;
+        if (p[i].skip) {
+            phys_page_compact(&p[i], nodes, compacted);
+        }
+    }
+
+    /* We can only compress if there's only one child. */
+    if (valid != 1) {
+        return;
+    }
+
+    assert(valid_ptr < P_L2_SIZE);
+
+    /* Don't compress if it won't fit in the # of bits we have. */
+    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
+        return;
+    }
+
+    lp->ptr = p[valid_ptr].ptr;
+    if (!p[valid_ptr].skip) {
+        /* If our only child is a leaf, make this a leaf. */
+        /* By design, we should have made this node a leaf to begin with so we
+         * should never reach here.
+         * But since it's so simple to handle this, let's do it just in case we
+         * change this rule.
+         */
+        lp->skip = 0;
+    } else {
+        lp->skip += p[valid_ptr].skip;
+    }
+}
+
+static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
+{
+    DECLARE_BITMAP(compacted, nodes_nb);
+
+    if (d->phys_map.skip) {
+        phys_page_compact(&d->phys_map, d->nodes, compacted);
+    }
+}
+
 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
                                            Node *nodes, MemoryRegionSection *sections)
 {
@@ -230,7 +294,14 @@  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
         p = nodes[lp.ptr];
         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
     }
-    return &sections[lp.ptr];
+
+    if (sections[lp.ptr].size.hi ||
+        range_covers_byte(sections[lp.ptr].offset_within_address_space,
+                          sections[lp.ptr].size.lo, addr)) {
+        return &sections[lp.ptr];
+    } else {
+        return &sections[PHYS_SECTION_UNASSIGNED];
+    }
 }
 
 bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -1696,6 +1767,8 @@  static void mem_commit(MemoryListener *listener)
     next->nodes = next_map.nodes;
     next->sections = next_map.sections;
 
+    phys_page_compact_all(next, next_map.nodes_nb);
+
     as->dispatch = next;
     g_free(cur);
 }