diff mbox

[RFC,15/38] radix-tree: add generic lockless radix tree module

Message ID 1440375847-17603-16-git-send-email-cota@braap.org
State New
Headers show

Commit Message

Emilio Cota Aug. 24, 2015, 12:23 a.m. UTC
This will be used by atomic instruction emulation code.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/radix-tree.h | 29 ++++++++++++++++++
 util/Makefile.objs        |  2 +-
 util/radix-tree.c         | 75 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 include/qemu/radix-tree.h
 create mode 100644 util/radix-tree.c

Comments

Alex Bennée Sept. 10, 2015, 2:25 p.m. UTC | #1
Emilio G. Cota <cota@braap.org> writes:

> This will be used by atomic instruction emulation code.

If we are adding utility functions into the code base like this (which I
can see being useful) we should at least add some documentation with
example calling conventions to docs/

Have you any performance numbers comparing the efficiency of the radix
approach to a "dumb" implementation?

>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  include/qemu/radix-tree.h | 29 ++++++++++++++++++
>  util/Makefile.objs        |  2 +-
>  util/radix-tree.c         | 75 +++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 105 insertions(+), 1 deletion(-)
>  create mode 100644 include/qemu/radix-tree.h
>  create mode 100644 util/radix-tree.c
>
> diff --git a/include/qemu/radix-tree.h b/include/qemu/radix-tree.h
> new file mode 100644
> index 0000000..a4e1f97
> --- /dev/null
> +++ b/include/qemu/radix-tree.h
> @@ -0,0 +1,29 @@
> +#ifndef RADIX_TREE_H
> +#define RADIX_TREE_H
> +
> +#include <stddef.h>
> +
> +typedef struct QemuRadixNode QemuRadixNode;
> +typedef struct QemuRadixTree QemuRadixTree;
> +
> +struct QemuRadixNode {
> +    void *slots[0];
> +};
> +
> +struct QemuRadixTree {
> +    QemuRadixNode *root;
> +    int radix;
> +    int max_height;
> +};
> +
> +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix);
> +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index,
> +                                 void *(*create)(unsigned long),
> +                                 void (*delete)(void *));
> +
> +static inline void *qemu_radix_tree_find(QemuRadixTree *t, unsigned long index)
> +{
> +    return qemu_radix_tree_find_alloc(t, index, NULL, NULL);
> +}
> +
> +#endif /* RADIX_TREE_H */
> diff --git a/util/Makefile.objs b/util/Makefile.objs
> index 114d657..6b18d3d 100644
> --- a/util/Makefile.objs
> +++ b/util/Makefile.objs
> @@ -1,4 +1,4 @@
> -util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
> +util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o radix-tree.o
>  util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
>  util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
>  util-obj-y += envlist.o path.o module.o
> diff --git a/util/radix-tree.c b/util/radix-tree.c
> new file mode 100644
> index 0000000..69eff29
> --- /dev/null
> +++ b/util/radix-tree.c
> @@ -0,0 +1,75 @@
> +/*
> + * radix-tree.c
> + * Non-blocking radix tree.
> + *
> + * Features:
> + * - Concurrent lookups and inserts.
> + * - No support for deletions.
> + *
> + * Conventions:
> + * - Height is counted starting from 0 at the bottom.
> + * - The index is used from left to right, i.e. MSBs are used first. This way
> + *   nearby addresses land in nearby slots, minimising cache/TLB misses.
> + */
> +#include <glib.h>
> +
> +#include "qemu/radix-tree.h"
> +#include "qemu/atomic.h"
> +#include "qemu/bitops.h"
> +#include "qemu/osdep.h"
> +
> +typedef struct QemuRadixNode QemuRadixNode;
> +
> +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index,
> +                                 void *(*create)(unsigned long),
> +                                 void (*delete)(void *))
> +{
> +    QemuRadixNode *parent;
> +    QemuRadixNode *node = tree->root;
> +    void **slot;
> +    int n_slots = BIT(tree->radix);
> +    int level = tree->max_height - 1;
> +    int shift = (level - 1) * tree->radix;
> +
> +    do {
> +        parent = node;
> +        slot = parent->slots + ((index >> shift) & (n_slots - 1));
> +        node = atomic_read(slot);
> +        smp_read_barrier_depends();
> +        if (node == NULL) {
> +            void *old;
> +            void *new;
> +
> +            if (!create) {
> +                return NULL;
> +            }
> +
> +            if (level == 1) {
> +                node = create(index);
> +            } else {
> +                node = g_malloc0(sizeof(*node) + sizeof(void *) * n_slots);
> +            }
> +            new = node;
> +            /* atomic_cmpxchg is type-safe so we cannot use 'node' here */
> +            old = atomic_cmpxchg(slot, NULL, new);
> +            if (old) {
> +                if (level == 1) {
> +                    delete(node);
> +                } else {
> +                    g_free(node);
> +                }
> +                node = old;
> +            }
> +        }
> +        shift -= tree->radix;
> +        level--;
> +    } while (level > 0);
> +    return node;
> +}
> +
> +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix)
> +{
> +    tree->radix = radix;
> +    tree->max_height = 1 + DIV_ROUND_UP(bits, radix);
> +    tree->root = g_malloc0(sizeof(*tree->root) + sizeof(void *) * BIT(radix));
> +}
Emilio Cota Sept. 10, 2015, 6 p.m. UTC | #2
On Thu, Sep 10, 2015 at 15:25:50 +0100, Alex Bennée wrote:
> 
> Emilio G. Cota <cota@braap.org> writes:
> 
> > This will be used by atomic instruction emulation code.
> 
> If we are adding utility functions into the code base like this (which I
> can see being useful) we should at least add some documentation with
> example calling conventions to docs/

Ack.

> Have you any performance numbers comparing the efficiency of the radix
> approach to a "dumb" implementation?

I tried a few lazy solutions (meaning having to write less code, which
isn't that dumb), such as having a g_hash_table protected by a mutex. But
this means potential contention on that mutex, so that's not a better option.

Whatever the solution might be, this is a slow path (only invoked
on atomic ops or regular stores that affect lines previously accessed
with atomic ops) so scalability is a more important goal than absolute speed.

The big perf impact comes on x86 when instrumenting stores, which is
orthogonal to this; invoking a helper on each store is a huge perf
killer for some benchmarks. It's OK for others though, but I still think
we should be able to do better.

Thanks,

		Emilio
diff mbox

Patch

diff --git a/include/qemu/radix-tree.h b/include/qemu/radix-tree.h
new file mode 100644
index 0000000..a4e1f97
--- /dev/null
+++ b/include/qemu/radix-tree.h
@@ -0,0 +1,29 @@ 
+#ifndef RADIX_TREE_H
+#define RADIX_TREE_H
+
+#include <stddef.h>
+
+typedef struct QemuRadixNode QemuRadixNode;
+typedef struct QemuRadixTree QemuRadixTree;
+
+struct QemuRadixNode {
+    void *slots[0];
+};
+
+struct QemuRadixTree {
+    QemuRadixNode *root;
+    int radix;
+    int max_height;
+};
+
+void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix);
+void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index,
+                                 void *(*create)(unsigned long),
+                                 void (*delete)(void *));
+
+static inline void *qemu_radix_tree_find(QemuRadixTree *t, unsigned long index)
+{
+    return qemu_radix_tree_find_alloc(t, index, NULL, NULL);
+}
+
+#endif /* RADIX_TREE_H */
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 114d657..6b18d3d 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,4 +1,4 @@ 
-util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
+util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o radix-tree.o
 util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
 util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
 util-obj-y += envlist.o path.o module.o
diff --git a/util/radix-tree.c b/util/radix-tree.c
new file mode 100644
index 0000000..69eff29
--- /dev/null
+++ b/util/radix-tree.c
@@ -0,0 +1,75 @@ 
+/*
+ * radix-tree.c
+ * Non-blocking radix tree.
+ *
+ * Features:
+ * - Concurrent lookups and inserts.
+ * - No support for deletions.
+ *
+ * Conventions:
+ * - Height is counted starting from 0 at the bottom.
+ * - The index is used from left to right, i.e. MSBs are used first. This way
+ *   nearby addresses land in nearby slots, minimising cache/TLB misses.
+ */
+#include <glib.h>
+
+#include "qemu/radix-tree.h"
+#include "qemu/atomic.h"
+#include "qemu/bitops.h"
+#include "qemu/osdep.h"
+
+typedef struct QemuRadixNode QemuRadixNode;
+
+void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index,
+                                 void *(*create)(unsigned long),
+                                 void (*delete)(void *))
+{
+    QemuRadixNode *parent;
+    QemuRadixNode *node = tree->root;
+    void **slot;
+    int n_slots = BIT(tree->radix);
+    int level = tree->max_height - 1;
+    int shift = (level - 1) * tree->radix;
+
+    do {
+        parent = node;
+        slot = parent->slots + ((index >> shift) & (n_slots - 1));
+        node = atomic_read(slot);
+        smp_read_barrier_depends();
+        if (node == NULL) {
+            void *old;
+            void *new;
+
+            if (!create) {
+                return NULL;
+            }
+
+            if (level == 1) {
+                node = create(index);
+            } else {
+                node = g_malloc0(sizeof(*node) + sizeof(void *) * n_slots);
+            }
+            new = node;
+            /* atomic_cmpxchg is type-safe so we cannot use 'node' here */
+            old = atomic_cmpxchg(slot, NULL, new);
+            if (old) {
+                if (level == 1) {
+                    delete(node);
+                } else {
+                    g_free(node);
+                }
+                node = old;
+            }
+        }
+        shift -= tree->radix;
+        level--;
+    } while (level > 0);
+    return node;
+}
+
+void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix)
+{
+    tree->radix = radix;
+    tree->max_height = 1 + DIV_ROUND_UP(bits, radix);
+    tree->root = g_malloc0(sizeof(*tree->root) + sizeof(void *) * BIT(radix));
+}