Message ID | 1440375847-17603-16-git-send-email-cota@braap.org |
---|---|
State | New |
Headers | show |
Emilio G. Cota <cota@braap.org> writes: > This will be used by atomic instruction emulation code. If we are adding utility functions into the code base like this (which I can see being useful) we should at least add some documentation with example calling conventions to docs/ Have you any performance numbers comparing the efficiency of the radix approach to a "dumb" implementation? > > Signed-off-by: Emilio G. Cota <cota@braap.org> > --- > include/qemu/radix-tree.h | 29 ++++++++++++++++++ > util/Makefile.objs | 2 +- > util/radix-tree.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 105 insertions(+), 1 deletion(-) > create mode 100644 include/qemu/radix-tree.h > create mode 100644 util/radix-tree.c > > diff --git a/include/qemu/radix-tree.h b/include/qemu/radix-tree.h > new file mode 100644 > index 0000000..a4e1f97 > --- /dev/null > +++ b/include/qemu/radix-tree.h > @@ -0,0 +1,29 @@ > +#ifndef RADIX_TREE_H > +#define RADIX_TREE_H > + > +#include <stddef.h> > + > +typedef struct QemuRadixNode QemuRadixNode; > +typedef struct QemuRadixTree QemuRadixTree; > + > +struct QemuRadixNode { > + void *slots[0]; > +}; > + > +struct QemuRadixTree { > + QemuRadixNode *root; > + int radix; > + int max_height; > +}; > + > +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix); > +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index, > + void *(*create)(unsigned long), > + void (*delete)(void *)); > + > +static inline void *qemu_radix_tree_find(QemuRadixTree *t, unsigned long index) > +{ > + return qemu_radix_tree_find_alloc(t, index, NULL, NULL); > +} > + > +#endif /* RADIX_TREE_H */ > diff --git a/util/Makefile.objs b/util/Makefile.objs > index 114d657..6b18d3d 100644 > --- a/util/Makefile.objs > +++ b/util/Makefile.objs > @@ -1,4 +1,4 @@ > -util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o > +util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o radix-tree.o > util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o > util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o > util-obj-y += envlist.o path.o module.o > diff --git a/util/radix-tree.c b/util/radix-tree.c > new file mode 100644 > index 0000000..69eff29 > --- /dev/null > +++ b/util/radix-tree.c > @@ -0,0 +1,75 @@ > +/* > + * radix-tree.c > + * Non-blocking radix tree. > + * > + * Features: > + * - Concurrent lookups and inserts. > + * - No support for deletions. > + * > + * Conventions: > + * - Height is counted starting from 0 at the bottom. > + * - The index is used from left to right, i.e. MSBs are used first. This way > + * nearby addresses land in nearby slots, minimising cache/TLB misses. > + */ > +#include <glib.h> > + > +#include "qemu/radix-tree.h" > +#include "qemu/atomic.h" > +#include "qemu/bitops.h" > +#include "qemu/osdep.h" > + > +typedef struct QemuRadixNode QemuRadixNode; > + > +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index, > + void *(*create)(unsigned long), > + void (*delete)(void *)) > +{ > + QemuRadixNode *parent; > + QemuRadixNode *node = tree->root; > + void **slot; > + int n_slots = BIT(tree->radix); > + int level = tree->max_height - 1; > + int shift = (level - 1) * tree->radix; > + > + do { > + parent = node; > + slot = parent->slots + ((index >> shift) & (n_slots - 1)); > + node = atomic_read(slot); > + smp_read_barrier_depends(); > + if (node == NULL) { > + void *old; > + void *new; > + > + if (!create) { > + return NULL; > + } > + > + if (level == 1) { > + node = create(index); > + } else { > + node = g_malloc0(sizeof(*node) + sizeof(void *) * n_slots); > + } > + new = node; > + /* atomic_cmpxchg is type-safe so we cannot use 'node' here */ > + old = atomic_cmpxchg(slot, NULL, new); > + if (old) { > + if (level == 1) { > + delete(node); > + } else { > + g_free(node); > + } > + node = old; > + } > + } > + shift -= tree->radix; > + level--; > + } while (level > 0); > + return node; > +} > + > +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix) > +{ > + tree->radix = radix; > + tree->max_height = 1 + DIV_ROUND_UP(bits, radix); > + tree->root = g_malloc0(sizeof(*tree->root) + sizeof(void *) * BIT(radix)); > +}
On Thu, Sep 10, 2015 at 15:25:50 +0100, Alex Bennée wrote: > > Emilio G. Cota <cota@braap.org> writes: > > > This will be used by atomic instruction emulation code. > > If we are adding utility functions into the code base like this (which I > can see being useful) we should at least add some documentation with > example calling conventions to docs/ Ack. > Have you any performance numbers comparing the efficiency of the radix > approach to a "dumb" implementation? I tried a few lazy solutions (meaning having to write less code, which isn't that dumb), such as having a g_hash_table protected by a mutex. But this means potential contention on that mutex, so that's not a better option. Whatever the solution might be, this is a slow path (only invoked on atomic ops or regular stores that affect lines previously accessed with atomic ops) so scalability is a more important goal than absolute speed. The big perf impact comes on x86 when instrumenting stores, which is orthogonal to this; invoking a helper on each store is a huge perf killer for some benchmarks. It's OK for others though, but I still think we should be able to do better. Thanks, Emilio
diff --git a/include/qemu/radix-tree.h b/include/qemu/radix-tree.h new file mode 100644 index 0000000..a4e1f97 --- /dev/null +++ b/include/qemu/radix-tree.h @@ -0,0 +1,29 @@ +#ifndef RADIX_TREE_H +#define RADIX_TREE_H + +#include <stddef.h> + +typedef struct QemuRadixNode QemuRadixNode; +typedef struct QemuRadixTree QemuRadixTree; + +struct QemuRadixNode { + void *slots[0]; +}; + +struct QemuRadixTree { + QemuRadixNode *root; + int radix; + int max_height; +}; + +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix); +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index, + void *(*create)(unsigned long), + void (*delete)(void *)); + +static inline void *qemu_radix_tree_find(QemuRadixTree *t, unsigned long index) +{ + return qemu_radix_tree_find_alloc(t, index, NULL, NULL); +} + +#endif /* RADIX_TREE_H */ diff --git a/util/Makefile.objs b/util/Makefile.objs index 114d657..6b18d3d 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,4 +1,4 @@ -util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o +util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o radix-tree.o util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o util-obj-y += envlist.o path.o module.o diff --git a/util/radix-tree.c b/util/radix-tree.c new file mode 100644 index 0000000..69eff29 --- /dev/null +++ b/util/radix-tree.c @@ -0,0 +1,75 @@ +/* + * radix-tree.c + * Non-blocking radix tree. + * + * Features: + * - Concurrent lookups and inserts. + * - No support for deletions. + * + * Conventions: + * - Height is counted starting from 0 at the bottom. + * - The index is used from left to right, i.e. MSBs are used first. This way + * nearby addresses land in nearby slots, minimising cache/TLB misses. + */ +#include <glib.h> + +#include "qemu/radix-tree.h" +#include "qemu/atomic.h" +#include "qemu/bitops.h" +#include "qemu/osdep.h" + +typedef struct QemuRadixNode QemuRadixNode; + +void *qemu_radix_tree_find_alloc(QemuRadixTree *tree, unsigned long index, + void *(*create)(unsigned long), + void (*delete)(void *)) +{ + QemuRadixNode *parent; + QemuRadixNode *node = tree->root; + void **slot; + int n_slots = BIT(tree->radix); + int level = tree->max_height - 1; + int shift = (level - 1) * tree->radix; + + do { + parent = node; + slot = parent->slots + ((index >> shift) & (n_slots - 1)); + node = atomic_read(slot); + smp_read_barrier_depends(); + if (node == NULL) { + void *old; + void *new; + + if (!create) { + return NULL; + } + + if (level == 1) { + node = create(index); + } else { + node = g_malloc0(sizeof(*node) + sizeof(void *) * n_slots); + } + new = node; + /* atomic_cmpxchg is type-safe so we cannot use 'node' here */ + old = atomic_cmpxchg(slot, NULL, new); + if (old) { + if (level == 1) { + delete(node); + } else { + g_free(node); + } + node = old; + } + } + shift -= tree->radix; + level--; + } while (level > 0); + return node; +} + +void qemu_radix_tree_init(QemuRadixTree *tree, int bits, int radix) +{ + tree->radix = radix; + tree->max_height = 1 + DIV_ROUND_UP(bits, radix); + tree->root = g_malloc0(sizeof(*tree->root) + sizeof(void *) * BIT(radix)); +}
This will be used by atomic instruction emulation code. Signed-off-by: Emilio G. Cota <cota@braap.org> --- include/qemu/radix-tree.h | 29 ++++++++++++++++++ util/Makefile.objs | 2 +- util/radix-tree.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 include/qemu/radix-tree.h create mode 100644 util/radix-tree.c