Message ID | 1355144985-12897-4-git-send-email-stefanha@redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > The data plane thread needs to map guest physical addresses to host > pointers. Normally this is done with cpu_physical_memory_map() but the > function assumes the global mutex is held. The data plane thread does > not touch the global mutex and therefore needs a thread-safe memory > mapping mechanism. > > Hostmem registers a MemoryListener similar to how vhost collects and > pushes memory region information into the kernel. There is a > fine-grained lock on the regions list which is held during lookup and > when installing a new regions list. Can we export and reuse the vhost code for this? I think you will find this advantageous when you add migration support down the line. And if you find it necessary to use MemoryListener e.g. for performance reasons, then vhost will likely benefit too. > When the physical memory map changes the MemoryListener callbacks are > invoked. They build up a new list of memory regions which is finally > installed when the list has been completed. > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > --- > hw/Makefile.objs | 2 +- > hw/dataplane/Makefile.objs | 3 + > hw/dataplane/hostmem.c | 176 +++++++++++++++++++++++++++++++++++++++++++++ > hw/dataplane/hostmem.h | 57 +++++++++++++++ > 4 files changed, 237 insertions(+), 1 deletion(-) > create mode 100644 hw/dataplane/Makefile.objs > create mode 100644 hw/dataplane/hostmem.c > create mode 100644 hw/dataplane/hostmem.h > > diff --git a/hw/Makefile.objs b/hw/Makefile.objs > index d581d8d..cec84bc 100644 > --- a/hw/Makefile.objs > +++ b/hw/Makefile.objs > @@ -1,4 +1,4 @@ > -common-obj-y = usb/ ide/ > +common-obj-y = usb/ ide/ dataplane/ > common-obj-y += loader.o > common-obj-$(CONFIG_VIRTIO) += virtio-console.o > common-obj-$(CONFIG_VIRTIO) += virtio-rng.o > diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs > new file mode 100644 > index 0000000..8c8dea1 > --- /dev/null > +++ b/hw/dataplane/Makefile.objs > @@ -0,0 +1,3 @@ > +ifeq ($(CONFIG_VIRTIO), y) > +common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o > +endif > diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c > new file mode 100644 > index 0000000..d5e1070 > --- /dev/null > +++ b/hw/dataplane/hostmem.c > @@ -0,0 +1,176 @@ > +/* > + * Thread-safe guest to host memory mapping > + * > + * Copyright 2012 Red Hat, Inc. and/or its affiliates > + * > + * Authors: > + * Stefan Hajnoczi <stefanha@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "exec-memory.h" > +#include "hostmem.h" > + > +static int hostmem_lookup_cmp(const void *phys_, const void *region_) > +{ > + hwaddr phys = *(const hwaddr *)phys_; > + const HostmemRegion *region = region_; > + > + if (phys < region->guest_addr) { > + return -1; > + } else if (phys >= region->guest_addr + region->size) { > + return 1; > + } else { > + return 0; > + } > +} > + > +/** > + * Map guest physical address to host pointer > + */ > +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write) > +{ > + HostmemRegion *region; > + void *host_addr = NULL; > + hwaddr offset_within_region; > + > + qemu_mutex_lock(&hostmem->current_regions_lock); > + region = bsearch(&phys, hostmem->current_regions, > + hostmem->num_current_regions, > + sizeof(hostmem->current_regions[0]), > + hostmem_lookup_cmp); > + if (!region) { > + goto out; > + } > + if (is_write && region->readonly) { > + goto out; > + } > + offset_within_region = phys - region->guest_addr; > + if (offset_within_region + len <= region->size) { > + host_addr = region->host_addr + offset_within_region; > + } > +out: > + qemu_mutex_unlock(&hostmem->current_regions_lock); > + > + return host_addr; > +} > + > +/** > + * Install new regions list > + */ > +static void hostmem_listener_commit(MemoryListener *listener) > +{ > + Hostmem *hostmem = container_of(listener, Hostmem, listener); > + > + qemu_mutex_lock(&hostmem->current_regions_lock); > + g_free(hostmem->current_regions); > + hostmem->current_regions = hostmem->new_regions; > + hostmem->num_current_regions = hostmem->num_new_regions; > + qemu_mutex_unlock(&hostmem->current_regions_lock); > + > + /* Reset new regions list */ > + hostmem->new_regions = NULL; > + hostmem->num_new_regions = 0; > +} > + > +/** > + * Add a MemoryRegionSection to the new regions list > + */ > +static void hostmem_append_new_region(Hostmem *hostmem, > + MemoryRegionSection *section) > +{ > + void *ram_ptr = memory_region_get_ram_ptr(section->mr); > + size_t num = hostmem->num_new_regions; > + size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]); > + > + hostmem->new_regions = g_realloc(hostmem->new_regions, new_size); > + hostmem->new_regions[num] = (HostmemRegion){ > + .host_addr = ram_ptr + section->offset_within_region, > + .guest_addr = section->offset_within_address_space, > + .size = section->size, > + .readonly = section->readonly, > + }; > + hostmem->num_new_regions++; > +} > + > +static void hostmem_listener_append_region(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + Hostmem *hostmem = container_of(listener, Hostmem, listener); > + > + /* Ignore non-RAM regions, we may not be able to map them */ > + if (!memory_region_is_ram(section->mr)) { > + return; > + } > + > + /* Ignore regions with dirty logging, we cannot mark them dirty */ > + if (memory_region_is_logging(section->mr)) { > + return; > + } > + > + hostmem_append_new_region(hostmem, section); > +} > + > +/* We don't implement most MemoryListener callbacks, use these nop stubs */ > +static void hostmem_listener_dummy(MemoryListener *listener) > +{ > +} > + > +static void hostmem_listener_section_dummy(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > +} > + > +static void hostmem_listener_eventfd_dummy(MemoryListener *listener, > + MemoryRegionSection *section, > + bool match_data, uint64_t data, > + EventNotifier *e) > +{ > +} > + > +static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener, > + MemoryRegionSection *section, > + hwaddr addr, hwaddr len) > +{ > +} > + > +void hostmem_init(Hostmem *hostmem) > +{ > + memset(hostmem, 0, sizeof(*hostmem)); > + > + qemu_mutex_init(&hostmem->current_regions_lock); > + > + hostmem->listener = (MemoryListener){ > + .begin = hostmem_listener_dummy, > + .commit = hostmem_listener_commit, > + .region_add = hostmem_listener_append_region, > + .region_del = hostmem_listener_section_dummy, > + .region_nop = hostmem_listener_append_region, > + .log_start = hostmem_listener_section_dummy, > + .log_stop = hostmem_listener_section_dummy, > + .log_sync = hostmem_listener_section_dummy, > + .log_global_start = hostmem_listener_dummy, > + .log_global_stop = hostmem_listener_dummy, > + .eventfd_add = hostmem_listener_eventfd_dummy, > + .eventfd_del = hostmem_listener_eventfd_dummy, > + .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy, > + .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy, > + .priority = 10, > + }; > + > + memory_listener_register(&hostmem->listener, &address_space_memory); > + if (hostmem->num_new_regions > 0) { > + hostmem_listener_commit(&hostmem->listener); > + } > +} > + > +void hostmem_finalize(Hostmem *hostmem) > +{ > + memory_listener_unregister(&hostmem->listener); > + g_free(hostmem->new_regions); > + g_free(hostmem->current_regions); > + qemu_mutex_destroy(&hostmem->current_regions_lock); > +} > diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h > new file mode 100644 > index 0000000..6d87841 > --- /dev/null > +++ b/hw/dataplane/hostmem.h > @@ -0,0 +1,57 @@ > +/* > + * Thread-safe guest to host memory mapping > + * > + * Copyright 2012 Red Hat, Inc. and/or its affiliates > + * > + * Authors: > + * Stefan Hajnoczi <stefanha@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef HOSTMEM_H > +#define HOSTMEM_H > + > +#include "memory.h" > +#include "qemu-thread.h" > + > +typedef struct { > + void *host_addr; > + hwaddr guest_addr; > + uint64_t size; > + bool readonly; > +} HostmemRegion; > + > +typedef struct { > + /* The listener is invoked when regions change and a new list of regions is > + * built up completely before they are installed. > + */ > + MemoryListener listener; > + HostmemRegion *new_regions; > + size_t num_new_regions; > + > + /* Current regions are accessed from multiple threads either to lookup > + * addresses or to install a new list of regions. The lock protects the > + * pointer and the regions. > + */ > + QemuMutex current_regions_lock; > + HostmemRegion *current_regions; > + size_t num_current_regions; > +} Hostmem; > + > +void hostmem_init(Hostmem *hostmem); > +void hostmem_finalize(Hostmem *hostmem); > + > +/** > + * Map a guest physical address to a pointer > + * > + * Note that there is map/unmap mechanism here. The caller must ensure that > + * mapped memory is no longer used across events like hot memory unplug. This > + * can be done with other mechanisms like bdrv_drain_all() that quiesce > + * in-flight I/O. > + */ > +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write); > + > +#endif /* HOSTMEM_H */ > -- > 1.8.0.1
On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: >> The data plane thread needs to map guest physical addresses to host >> pointers. Normally this is done with cpu_physical_memory_map() but the >> function assumes the global mutex is held. The data plane thread does >> not touch the global mutex and therefore needs a thread-safe memory >> mapping mechanism. >> >> Hostmem registers a MemoryListener similar to how vhost collects and >> pushes memory region information into the kernel. There is a >> fine-grained lock on the regions list which is held during lookup and >> when installing a new regions list. > > Can we export and reuse the vhost code for this? > I think you will find this advantageous when you add migration > support down the line. > And if you find it necessary to use MemoryListener e.g. for performance > reasons, then vhost will likely benefit too. It's technically possible and not hard to do but it prevents integrating deeper with core QEMU as the memory API becomes thread-safe. There are two ways to implement dirty logging: 1. The vhost log approach which syncs dirty information periodically. 2. A cheap thread-safe way to mark dirty outside the global mutex, i.e. a thread-safe memory_region_set_dirty(). If we can get thread-safe guest memory load/store in QEMU then #2 is included. We can switch to using hw/virtio.c instead of hw/dataplane/vring.c, we get dirty logging for free, we can drop hostmem.c completely, etc. Stefan
On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > >> The data plane thread needs to map guest physical addresses to host > >> pointers. Normally this is done with cpu_physical_memory_map() but the > >> function assumes the global mutex is held. The data plane thread does > >> not touch the global mutex and therefore needs a thread-safe memory > >> mapping mechanism. > >> > >> Hostmem registers a MemoryListener similar to how vhost collects and > >> pushes memory region information into the kernel. There is a > >> fine-grained lock on the regions list which is held during lookup and > >> when installing a new regions list. > > > > Can we export and reuse the vhost code for this? > > I think you will find this advantageous when you add migration > > support down the line. > > And if you find it necessary to use MemoryListener e.g. for performance > > reasons, then vhost will likely benefit too. > > It's technically possible and not hard to do but it prevents > integrating deeper with core QEMU as the memory API becomes > thread-safe. > > There are two ways to implement dirty logging: > 1. The vhost log approach which syncs dirty information periodically. > 2. A cheap thread-safe way to mark dirty outside the global mutex, > i.e. a thread-safe memory_region_set_dirty(). You don't normally want to dirty the whole region, you want to do this to individual pages. > If we can get thread-safe guest memory load/store in QEMU then #2 is > included. We can switch to using hw/virtio.c instead of > hw/dataplane/vring.c, we get dirty logging for free, we can drop > hostmem.c completely, etc. > > Stefan So why not reuse existing code? If you drop it later it won't matter what you used ...
"Michael S. Tsirkin" <mst@redhat.com> writes: > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: >> >> The data plane thread needs to map guest physical addresses to host >> >> pointers. Normally this is done with cpu_physical_memory_map() but the >> >> function assumes the global mutex is held. The data plane thread does >> >> not touch the global mutex and therefore needs a thread-safe memory >> >> mapping mechanism. >> >> >> >> Hostmem registers a MemoryListener similar to how vhost collects and >> >> pushes memory region information into the kernel. There is a >> >> fine-grained lock on the regions list which is held during lookup and >> >> when installing a new regions list. >> > >> > Can we export and reuse the vhost code for this? >> > I think you will find this advantageous when you add migration >> > support down the line. >> > And if you find it necessary to use MemoryListener e.g. for performance >> > reasons, then vhost will likely benefit too. >> >> It's technically possible and not hard to do but it prevents >> integrating deeper with core QEMU as the memory API becomes >> thread-safe. >> >> There are two ways to implement dirty logging: >> 1. The vhost log approach which syncs dirty information periodically. >> 2. A cheap thread-safe way to mark dirty outside the global mutex, >> i.e. a thread-safe memory_region_set_dirty(). > > You don't normally want to dirty the whole region, > you want to do this to individual pages. > >> If we can get thread-safe guest memory load/store in QEMU then #2 is >> included. We can switch to using hw/virtio.c instead of >> hw/dataplane/vring.c, we get dirty logging for free, we can drop >> hostmem.c completely, etc. >> >> Stefan > > So why not reuse existing code? If you drop it later it won't > matter what you used ... Let's not lose sight of the forest for the trees here... This whole series is not reusing existing code. That's really the whole point. The point is to take the code (duplication and all) and then do all of the refactoring to use common code in the tree itself. If we want to put this in a hw/staging/ directory, that's fine by me too. Regards, Anthony Liguori > > -- > MST
On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: > "Michael S. Tsirkin" <mst@redhat.com> writes: > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > >> >> The data plane thread needs to map guest physical addresses to host > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the > >> >> function assumes the global mutex is held. The data plane thread does > >> >> not touch the global mutex and therefore needs a thread-safe memory > >> >> mapping mechanism. > >> >> > >> >> Hostmem registers a MemoryListener similar to how vhost collects and > >> >> pushes memory region information into the kernel. There is a > >> >> fine-grained lock on the regions list which is held during lookup and > >> >> when installing a new regions list. > >> > > >> > Can we export and reuse the vhost code for this? > >> > I think you will find this advantageous when you add migration > >> > support down the line. > >> > And if you find it necessary to use MemoryListener e.g. for performance > >> > reasons, then vhost will likely benefit too. > >> > >> It's technically possible and not hard to do but it prevents > >> integrating deeper with core QEMU as the memory API becomes > >> thread-safe. > >> > >> There are two ways to implement dirty logging: > >> 1. The vhost log approach which syncs dirty information periodically. > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, > >> i.e. a thread-safe memory_region_set_dirty(). > > > > You don't normally want to dirty the whole region, > > you want to do this to individual pages. > > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is > >> included. We can switch to using hw/virtio.c instead of > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop > >> hostmem.c completely, etc. > >> > >> Stefan > > > > So why not reuse existing code? If you drop it later it won't > > matter what you used ... > > Let's not lose sight of the forest for the trees here... > > This whole series is not reusing existing code. That's really the whole > point. > > The point is to take the code (duplication and all) and then do all of > the refactoring to use common code in the tree itself. > > If we want to put this in a hw/staging/ directory, that's fine by me > too. > > Regards, > > Anthony Liguori Yes I agree. I think lack of handling for cross regin descriptors bothers me a bit more. > > > > -- > > MST
On Tue, Dec 11, 2012 at 08:09:56PM +0200, Michael S. Tsirkin wrote: > On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: > > "Michael S. Tsirkin" <mst@redhat.com> writes: > > > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > > >> >> The data plane thread needs to map guest physical addresses to host > > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the > > >> >> function assumes the global mutex is held. The data plane thread does > > >> >> not touch the global mutex and therefore needs a thread-safe memory > > >> >> mapping mechanism. > > >> >> > > >> >> Hostmem registers a MemoryListener similar to how vhost collects and > > >> >> pushes memory region information into the kernel. There is a > > >> >> fine-grained lock on the regions list which is held during lookup and > > >> >> when installing a new regions list. > > >> > > > >> > Can we export and reuse the vhost code for this? > > >> > I think you will find this advantageous when you add migration > > >> > support down the line. > > >> > And if you find it necessary to use MemoryListener e.g. for performance > > >> > reasons, then vhost will likely benefit too. > > >> > > >> It's technically possible and not hard to do but it prevents > > >> integrating deeper with core QEMU as the memory API becomes > > >> thread-safe. > > >> > > >> There are two ways to implement dirty logging: > > >> 1. The vhost log approach which syncs dirty information periodically. > > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, > > >> i.e. a thread-safe memory_region_set_dirty(). > > > > > > You don't normally want to dirty the whole region, > > > you want to do this to individual pages. > > > > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is > > >> included. We can switch to using hw/virtio.c instead of > > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop > > >> hostmem.c completely, etc. > > >> > > >> Stefan > > > > > > So why not reuse existing code? If you drop it later it won't > > > matter what you used ... > > > > Let's not lose sight of the forest for the trees here... > > > > This whole series is not reusing existing code. That's really the whole > > point. > > > > The point is to take the code (duplication and all) and then do all of > > the refactoring to use common code in the tree itself. > > > > If we want to put this in a hw/staging/ directory, that's fine by me > > too. > > > > Regards, > > > > Anthony Liguori > > Yes I agree. I think lack of handling for cross regin descriptors > bothers me a bit more. The two things you've mentioned both aren't handled by hw/virtio.c: 1. Issue: Indirect descriptors have no alignment restrictions and can cross regions. hw/virtio.c uses vring_desc_flags() and other accessor functions, which do lduw_phys() - there is no memory region boundary checking here. 2. Issue: Virtio buffers can cross memory region boundaries. hw/virtio.c maps buffers 1:1 using virtqueue_map_sg() and exits if mapping fails. It does not split buffers if they cross a memory region. These are definitely ugly corner cases but hw/virtio.c is proof that we're not hitting them in practice. Stefan
On Wed, Dec 12, 2012 at 04:34:21PM +0100, Stefan Hajnoczi wrote: > On Tue, Dec 11, 2012 at 08:09:56PM +0200, Michael S. Tsirkin wrote: > > On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: > > > "Michael S. Tsirkin" <mst@redhat.com> writes: > > > > > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > > > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > > > >> >> The data plane thread needs to map guest physical addresses to host > > > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the > > > >> >> function assumes the global mutex is held. The data plane thread does > > > >> >> not touch the global mutex and therefore needs a thread-safe memory > > > >> >> mapping mechanism. > > > >> >> > > > >> >> Hostmem registers a MemoryListener similar to how vhost collects and > > > >> >> pushes memory region information into the kernel. There is a > > > >> >> fine-grained lock on the regions list which is held during lookup and > > > >> >> when installing a new regions list. > > > >> > > > > >> > Can we export and reuse the vhost code for this? > > > >> > I think you will find this advantageous when you add migration > > > >> > support down the line. > > > >> > And if you find it necessary to use MemoryListener e.g. for performance > > > >> > reasons, then vhost will likely benefit too. > > > >> > > > >> It's technically possible and not hard to do but it prevents > > > >> integrating deeper with core QEMU as the memory API becomes > > > >> thread-safe. > > > >> > > > >> There are two ways to implement dirty logging: > > > >> 1. The vhost log approach which syncs dirty information periodically. > > > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, > > > >> i.e. a thread-safe memory_region_set_dirty(). > > > > > > > > You don't normally want to dirty the whole region, > > > > you want to do this to individual pages. > > > > > > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is > > > >> included. We can switch to using hw/virtio.c instead of > > > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop > > > >> hostmem.c completely, etc. > > > >> > > > >> Stefan > > > > > > > > So why not reuse existing code? If you drop it later it won't > > > > matter what you used ... > > > > > > Let's not lose sight of the forest for the trees here... > > > > > > This whole series is not reusing existing code. That's really the whole > > > point. > > > > > > The point is to take the code (duplication and all) and then do all of > > > the refactoring to use common code in the tree itself. > > > > > > If we want to put this in a hw/staging/ directory, that's fine by me > > > too. > > > > > > Regards, > > > > > > Anthony Liguori > > > > Yes I agree. I think lack of handling for cross regin descriptors > > bothers me a bit more. > > The two things you've mentioned both aren't handled by hw/virtio.c: > > 1. Issue: Indirect descriptors have no alignment restrictions and can > cross regions. > > hw/virtio.c uses vring_desc_flags() and other accessor functions, > which do lduw_phys() - there is no memory region boundary checking > here. Since addresses are aligned this one is fine I think. > 2. Issue: Virtio buffers can cross memory region boundaries. > > hw/virtio.c maps buffers 1:1 using virtqueue_map_sg() and exits if > mapping fails. It does not split buffers if they cross a memory > region. > > These are definitely ugly corner cases but hw/virtio.c is proof that > we're not hitting them in practice. > > Stefan Yes, this one seems ugly. Maybe add a TODO? OK let's assume we want to put it in staging/ I worry about the virtio-blk changes being isolated. Can you put ifdef CONFIG_VIRTIO_BLK_DATA_PLANE around them all to avoid dependency on that header completely if configured out?
On Wed, Dec 12, 2012 at 4:49 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Wed, Dec 12, 2012 at 04:34:21PM +0100, Stefan Hajnoczi wrote: >> On Tue, Dec 11, 2012 at 08:09:56PM +0200, Michael S. Tsirkin wrote: >> > On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: >> > > "Michael S. Tsirkin" <mst@redhat.com> writes: >> > > >> > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: >> > > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: >> > > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: >> > > >> >> The data plane thread needs to map guest physical addresses to host >> > > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the >> > > >> >> function assumes the global mutex is held. The data plane thread does >> > > >> >> not touch the global mutex and therefore needs a thread-safe memory >> > > >> >> mapping mechanism. >> > > >> >> >> > > >> >> Hostmem registers a MemoryListener similar to how vhost collects and >> > > >> >> pushes memory region information into the kernel. There is a >> > > >> >> fine-grained lock on the regions list which is held during lookup and >> > > >> >> when installing a new regions list. >> > > >> > >> > > >> > Can we export and reuse the vhost code for this? >> > > >> > I think you will find this advantageous when you add migration >> > > >> > support down the line. >> > > >> > And if you find it necessary to use MemoryListener e.g. for performance >> > > >> > reasons, then vhost will likely benefit too. >> > > >> >> > > >> It's technically possible and not hard to do but it prevents >> > > >> integrating deeper with core QEMU as the memory API becomes >> > > >> thread-safe. >> > > >> >> > > >> There are two ways to implement dirty logging: >> > > >> 1. The vhost log approach which syncs dirty information periodically. >> > > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, >> > > >> i.e. a thread-safe memory_region_set_dirty(). >> > > > >> > > > You don't normally want to dirty the whole region, >> > > > you want to do this to individual pages. >> > > > >> > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is >> > > >> included. We can switch to using hw/virtio.c instead of >> > > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop >> > > >> hostmem.c completely, etc. >> > > >> >> > > >> Stefan >> > > > >> > > > So why not reuse existing code? If you drop it later it won't >> > > > matter what you used ... >> > > >> > > Let's not lose sight of the forest for the trees here... >> > > >> > > This whole series is not reusing existing code. That's really the whole >> > > point. >> > > >> > > The point is to take the code (duplication and all) and then do all of >> > > the refactoring to use common code in the tree itself. >> > > >> > > If we want to put this in a hw/staging/ directory, that's fine by me >> > > too. >> > > >> > > Regards, >> > > >> > > Anthony Liguori >> > >> > Yes I agree. I think lack of handling for cross regin descriptors >> > bothers me a bit more. >> >> The two things you've mentioned both aren't handled by hw/virtio.c: >> >> 1. Issue: Indirect descriptors have no alignment restrictions and can >> cross regions. >> >> hw/virtio.c uses vring_desc_flags() and other accessor functions, >> which do lduw_phys() - there is no memory region boundary checking >> here. > > Since addresses are aligned this one is fine I think. > >> 2. Issue: Virtio buffers can cross memory region boundaries. >> >> hw/virtio.c maps buffers 1:1 using virtqueue_map_sg() and exits if >> mapping fails. It does not split buffers if they cross a memory >> region. >> >> These are definitely ugly corner cases but hw/virtio.c is proof that >> we're not hitting them in practice. >> >> Stefan > > Yes, this one seems ugly. Maybe add a TODO? > > OK let's assume we want to put it in staging/ > I worry about the virtio-blk changes being isolated. > Can you put ifdef CONFIG_VIRTIO_BLK_DATA_PLANE around > them all to avoid dependency on that header completely > if configured out? Okay, I'll move the #ifdefs. I like the stubs in the header file because it reduces the amount of #ifdefs, but this is easy to change. Stefan
On Fri, Dec 14, 2012 at 12:45:16PM +0100, Stefan Hajnoczi wrote: > On Wed, Dec 12, 2012 at 4:49 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Wed, Dec 12, 2012 at 04:34:21PM +0100, Stefan Hajnoczi wrote: > >> On Tue, Dec 11, 2012 at 08:09:56PM +0200, Michael S. Tsirkin wrote: > >> > On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: > >> > > "Michael S. Tsirkin" <mst@redhat.com> writes: > >> > > > >> > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > >> > > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > >> > > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > >> > > >> >> The data plane thread needs to map guest physical addresses to host > >> > > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the > >> > > >> >> function assumes the global mutex is held. The data plane thread does > >> > > >> >> not touch the global mutex and therefore needs a thread-safe memory > >> > > >> >> mapping mechanism. > >> > > >> >> > >> > > >> >> Hostmem registers a MemoryListener similar to how vhost collects and > >> > > >> >> pushes memory region information into the kernel. There is a > >> > > >> >> fine-grained lock on the regions list which is held during lookup and > >> > > >> >> when installing a new regions list. > >> > > >> > > >> > > >> > Can we export and reuse the vhost code for this? > >> > > >> > I think you will find this advantageous when you add migration > >> > > >> > support down the line. > >> > > >> > And if you find it necessary to use MemoryListener e.g. for performance > >> > > >> > reasons, then vhost will likely benefit too. > >> > > >> > >> > > >> It's technically possible and not hard to do but it prevents > >> > > >> integrating deeper with core QEMU as the memory API becomes > >> > > >> thread-safe. > >> > > >> > >> > > >> There are two ways to implement dirty logging: > >> > > >> 1. The vhost log approach which syncs dirty information periodically. > >> > > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, > >> > > >> i.e. a thread-safe memory_region_set_dirty(). > >> > > > > >> > > > You don't normally want to dirty the whole region, > >> > > > you want to do this to individual pages. > >> > > > > >> > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is > >> > > >> included. We can switch to using hw/virtio.c instead of > >> > > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop > >> > > >> hostmem.c completely, etc. > >> > > >> > >> > > >> Stefan > >> > > > > >> > > > So why not reuse existing code? If you drop it later it won't > >> > > > matter what you used ... > >> > > > >> > > Let's not lose sight of the forest for the trees here... > >> > > > >> > > This whole series is not reusing existing code. That's really the whole > >> > > point. > >> > > > >> > > The point is to take the code (duplication and all) and then do all of > >> > > the refactoring to use common code in the tree itself. > >> > > > >> > > If we want to put this in a hw/staging/ directory, that's fine by me > >> > > too. > >> > > > >> > > Regards, > >> > > > >> > > Anthony Liguori > >> > > >> > Yes I agree. I think lack of handling for cross regin descriptors > >> > bothers me a bit more. > >> > >> The two things you've mentioned both aren't handled by hw/virtio.c: > >> > >> 1. Issue: Indirect descriptors have no alignment restrictions and can > >> cross regions. > >> > >> hw/virtio.c uses vring_desc_flags() and other accessor functions, > >> which do lduw_phys() - there is no memory region boundary checking > >> here. > > > > Since addresses are aligned this one is fine I think. > > > >> 2. Issue: Virtio buffers can cross memory region boundaries. > >> > >> hw/virtio.c maps buffers 1:1 using virtqueue_map_sg() and exits if > >> mapping fails. It does not split buffers if they cross a memory > >> region. > >> > >> These are definitely ugly corner cases but hw/virtio.c is proof that > >> we're not hitting them in practice. > >> > >> Stefan > > > > Yes, this one seems ugly. Maybe add a TODO? > > > > OK let's assume we want to put it in staging/ > > I worry about the virtio-blk changes being isolated. > > Can you put ifdef CONFIG_VIRTIO_BLK_DATA_PLANE around > > them all to avoid dependency on that header completely > > if configured out? > > Okay, I'll move the #ifdefs. I like the stubs in the header file > because it reduces the amount of #ifdefs, but this is easy to change. > > Stefan Okay. Another option (if you prefer stubs) is to add a stub for access to s->dataplane field, and surround just the field with ifdefs. As it is, this code: if (s->dataplane) { return; } can't be compiled out since compiler is not smart enough to figure out dataplane is never set.
On Sun, Dec 16, 2012 at 06:11:14PM +0200, Michael S. Tsirkin wrote: > On Fri, Dec 14, 2012 at 12:45:16PM +0100, Stefan Hajnoczi wrote: > > On Wed, Dec 12, 2012 at 4:49 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > > On Wed, Dec 12, 2012 at 04:34:21PM +0100, Stefan Hajnoczi wrote: > > >> On Tue, Dec 11, 2012 at 08:09:56PM +0200, Michael S. Tsirkin wrote: > > >> > On Tue, Dec 11, 2012 at 10:32:28AM -0600, Anthony Liguori wrote: > > >> > > "Michael S. Tsirkin" <mst@redhat.com> writes: > > >> > > > > >> > > > On Tue, Dec 11, 2012 at 04:27:49PM +0100, Stefan Hajnoczi wrote: > > >> > > >> On Tue, Dec 11, 2012 at 3:13 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > >> > > >> > On Mon, Dec 10, 2012 at 02:09:36PM +0100, Stefan Hajnoczi wrote: > > >> > > >> >> The data plane thread needs to map guest physical addresses to host > > >> > > >> >> pointers. Normally this is done with cpu_physical_memory_map() but the > > >> > > >> >> function assumes the global mutex is held. The data plane thread does > > >> > > >> >> not touch the global mutex and therefore needs a thread-safe memory > > >> > > >> >> mapping mechanism. > > >> > > >> >> > > >> > > >> >> Hostmem registers a MemoryListener similar to how vhost collects and > > >> > > >> >> pushes memory region information into the kernel. There is a > > >> > > >> >> fine-grained lock on the regions list which is held during lookup and > > >> > > >> >> when installing a new regions list. > > >> > > >> > > > >> > > >> > Can we export and reuse the vhost code for this? > > >> > > >> > I think you will find this advantageous when you add migration > > >> > > >> > support down the line. > > >> > > >> > And if you find it necessary to use MemoryListener e.g. for performance > > >> > > >> > reasons, then vhost will likely benefit too. > > >> > > >> > > >> > > >> It's technically possible and not hard to do but it prevents > > >> > > >> integrating deeper with core QEMU as the memory API becomes > > >> > > >> thread-safe. > > >> > > >> > > >> > > >> There are two ways to implement dirty logging: > > >> > > >> 1. The vhost log approach which syncs dirty information periodically. > > >> > > >> 2. A cheap thread-safe way to mark dirty outside the global mutex, > > >> > > >> i.e. a thread-safe memory_region_set_dirty(). > > >> > > > > > >> > > > You don't normally want to dirty the whole region, > > >> > > > you want to do this to individual pages. > > >> > > > > > >> > > >> If we can get thread-safe guest memory load/store in QEMU then #2 is > > >> > > >> included. We can switch to using hw/virtio.c instead of > > >> > > >> hw/dataplane/vring.c, we get dirty logging for free, we can drop > > >> > > >> hostmem.c completely, etc. > > >> > > >> > > >> > > >> Stefan > > >> > > > > > >> > > > So why not reuse existing code? If you drop it later it won't > > >> > > > matter what you used ... > > >> > > > > >> > > Let's not lose sight of the forest for the trees here... > > >> > > > > >> > > This whole series is not reusing existing code. That's really the whole > > >> > > point. > > >> > > > > >> > > The point is to take the code (duplication and all) and then do all of > > >> > > the refactoring to use common code in the tree itself. > > >> > > > > >> > > If we want to put this in a hw/staging/ directory, that's fine by me > > >> > > too. > > >> > > > > >> > > Regards, > > >> > > > > >> > > Anthony Liguori > > >> > > > >> > Yes I agree. I think lack of handling for cross regin descriptors > > >> > bothers me a bit more. > > >> > > >> The two things you've mentioned both aren't handled by hw/virtio.c: > > >> > > >> 1. Issue: Indirect descriptors have no alignment restrictions and can > > >> cross regions. > > >> > > >> hw/virtio.c uses vring_desc_flags() and other accessor functions, > > >> which do lduw_phys() - there is no memory region boundary checking > > >> here. > > > > > > Since addresses are aligned this one is fine I think. > > > > > >> 2. Issue: Virtio buffers can cross memory region boundaries. > > >> > > >> hw/virtio.c maps buffers 1:1 using virtqueue_map_sg() and exits if > > >> mapping fails. It does not split buffers if they cross a memory > > >> region. > > >> > > >> These are definitely ugly corner cases but hw/virtio.c is proof that > > >> we're not hitting them in practice. > > >> > > >> Stefan > > > > > > Yes, this one seems ugly. Maybe add a TODO? > > > > > > OK let's assume we want to put it in staging/ > > > I worry about the virtio-blk changes being isolated. > > > Can you put ifdef CONFIG_VIRTIO_BLK_DATA_PLANE around > > > them all to avoid dependency on that header completely > > > if configured out? > > > > Okay, I'll move the #ifdefs. I like the stubs in the header file > > because it reduces the amount of #ifdefs, but this is easy to change. > > > > Stefan > > Okay. > Another option (if you prefer stubs) is to add a stub for access to > s->dataplane field, and surround just the field with ifdefs. > As it is, this code: > if (s->dataplane) { > return; > } > can't be compiled out since compiler is not smart enough to > figure out dataplane is never set. It's okay, I have already implemented your previous suggestion in the v7 patches that I sent out on Friday and I'm okay with it. Stefan
diff --git a/hw/Makefile.objs b/hw/Makefile.objs index d581d8d..cec84bc 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y = usb/ ide/ +common-obj-y = usb/ ide/ dataplane/ common-obj-y += loader.o common-obj-$(CONFIG_VIRTIO) += virtio-console.o common-obj-$(CONFIG_VIRTIO) += virtio-rng.o diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs new file mode 100644 index 0000000..8c8dea1 --- /dev/null +++ b/hw/dataplane/Makefile.objs @@ -0,0 +1,3 @@ +ifeq ($(CONFIG_VIRTIO), y) +common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o +endif diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c new file mode 100644 index 0000000..d5e1070 --- /dev/null +++ b/hw/dataplane/hostmem.c @@ -0,0 +1,176 @@ +/* + * Thread-safe guest to host memory mapping + * + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "exec-memory.h" +#include "hostmem.h" + +static int hostmem_lookup_cmp(const void *phys_, const void *region_) +{ + hwaddr phys = *(const hwaddr *)phys_; + const HostmemRegion *region = region_; + + if (phys < region->guest_addr) { + return -1; + } else if (phys >= region->guest_addr + region->size) { + return 1; + } else { + return 0; + } +} + +/** + * Map guest physical address to host pointer + */ +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write) +{ + HostmemRegion *region; + void *host_addr = NULL; + hwaddr offset_within_region; + + qemu_mutex_lock(&hostmem->current_regions_lock); + region = bsearch(&phys, hostmem->current_regions, + hostmem->num_current_regions, + sizeof(hostmem->current_regions[0]), + hostmem_lookup_cmp); + if (!region) { + goto out; + } + if (is_write && region->readonly) { + goto out; + } + offset_within_region = phys - region->guest_addr; + if (offset_within_region + len <= region->size) { + host_addr = region->host_addr + offset_within_region; + } +out: + qemu_mutex_unlock(&hostmem->current_regions_lock); + + return host_addr; +} + +/** + * Install new regions list + */ +static void hostmem_listener_commit(MemoryListener *listener) +{ + Hostmem *hostmem = container_of(listener, Hostmem, listener); + + qemu_mutex_lock(&hostmem->current_regions_lock); + g_free(hostmem->current_regions); + hostmem->current_regions = hostmem->new_regions; + hostmem->num_current_regions = hostmem->num_new_regions; + qemu_mutex_unlock(&hostmem->current_regions_lock); + + /* Reset new regions list */ + hostmem->new_regions = NULL; + hostmem->num_new_regions = 0; +} + +/** + * Add a MemoryRegionSection to the new regions list + */ +static void hostmem_append_new_region(Hostmem *hostmem, + MemoryRegionSection *section) +{ + void *ram_ptr = memory_region_get_ram_ptr(section->mr); + size_t num = hostmem->num_new_regions; + size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]); + + hostmem->new_regions = g_realloc(hostmem->new_regions, new_size); + hostmem->new_regions[num] = (HostmemRegion){ + .host_addr = ram_ptr + section->offset_within_region, + .guest_addr = section->offset_within_address_space, + .size = section->size, + .readonly = section->readonly, + }; + hostmem->num_new_regions++; +} + +static void hostmem_listener_append_region(MemoryListener *listener, + MemoryRegionSection *section) +{ + Hostmem *hostmem = container_of(listener, Hostmem, listener); + + /* Ignore non-RAM regions, we may not be able to map them */ + if (!memory_region_is_ram(section->mr)) { + return; + } + + /* Ignore regions with dirty logging, we cannot mark them dirty */ + if (memory_region_is_logging(section->mr)) { + return; + } + + hostmem_append_new_region(hostmem, section); +} + +/* We don't implement most MemoryListener callbacks, use these nop stubs */ +static void hostmem_listener_dummy(MemoryListener *listener) +{ +} + +static void hostmem_listener_section_dummy(MemoryListener *listener, + MemoryRegionSection *section) +{ +} + +static void hostmem_listener_eventfd_dummy(MemoryListener *listener, + MemoryRegionSection *section, + bool match_data, uint64_t data, + EventNotifier *e) +{ +} + +static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener, + MemoryRegionSection *section, + hwaddr addr, hwaddr len) +{ +} + +void hostmem_init(Hostmem *hostmem) +{ + memset(hostmem, 0, sizeof(*hostmem)); + + qemu_mutex_init(&hostmem->current_regions_lock); + + hostmem->listener = (MemoryListener){ + .begin = hostmem_listener_dummy, + .commit = hostmem_listener_commit, + .region_add = hostmem_listener_append_region, + .region_del = hostmem_listener_section_dummy, + .region_nop = hostmem_listener_append_region, + .log_start = hostmem_listener_section_dummy, + .log_stop = hostmem_listener_section_dummy, + .log_sync = hostmem_listener_section_dummy, + .log_global_start = hostmem_listener_dummy, + .log_global_stop = hostmem_listener_dummy, + .eventfd_add = hostmem_listener_eventfd_dummy, + .eventfd_del = hostmem_listener_eventfd_dummy, + .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy, + .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy, + .priority = 10, + }; + + memory_listener_register(&hostmem->listener, &address_space_memory); + if (hostmem->num_new_regions > 0) { + hostmem_listener_commit(&hostmem->listener); + } +} + +void hostmem_finalize(Hostmem *hostmem) +{ + memory_listener_unregister(&hostmem->listener); + g_free(hostmem->new_regions); + g_free(hostmem->current_regions); + qemu_mutex_destroy(&hostmem->current_regions_lock); +} diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h new file mode 100644 index 0000000..6d87841 --- /dev/null +++ b/hw/dataplane/hostmem.h @@ -0,0 +1,57 @@ +/* + * Thread-safe guest to host memory mapping + * + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HOSTMEM_H +#define HOSTMEM_H + +#include "memory.h" +#include "qemu-thread.h" + +typedef struct { + void *host_addr; + hwaddr guest_addr; + uint64_t size; + bool readonly; +} HostmemRegion; + +typedef struct { + /* The listener is invoked when regions change and a new list of regions is + * built up completely before they are installed. + */ + MemoryListener listener; + HostmemRegion *new_regions; + size_t num_new_regions; + + /* Current regions are accessed from multiple threads either to lookup + * addresses or to install a new list of regions. The lock protects the + * pointer and the regions. + */ + QemuMutex current_regions_lock; + HostmemRegion *current_regions; + size_t num_current_regions; +} Hostmem; + +void hostmem_init(Hostmem *hostmem); +void hostmem_finalize(Hostmem *hostmem); + +/** + * Map a guest physical address to a pointer + * + * Note that there is map/unmap mechanism here. The caller must ensure that + * mapped memory is no longer used across events like hot memory unplug. This + * can be done with other mechanisms like bdrv_drain_all() that quiesce + * in-flight I/O. + */ +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write); + +#endif /* HOSTMEM_H */
The data plane thread needs to map guest physical addresses to host pointers. Normally this is done with cpu_physical_memory_map() but the function assumes the global mutex is held. The data plane thread does not touch the global mutex and therefore needs a thread-safe memory mapping mechanism. Hostmem registers a MemoryListener similar to how vhost collects and pushes memory region information into the kernel. There is a fine-grained lock on the regions list which is held during lookup and when installing a new regions list. When the physical memory map changes the MemoryListener callbacks are invoked. They build up a new list of memory regions which is finally installed when the list has been completed. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- hw/Makefile.objs | 2 +- hw/dataplane/Makefile.objs | 3 + hw/dataplane/hostmem.c | 176 +++++++++++++++++++++++++++++++++++++++++++++ hw/dataplane/hostmem.h | 57 +++++++++++++++ 4 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 hw/dataplane/Makefile.objs create mode 100644 hw/dataplane/hostmem.c create mode 100644 hw/dataplane/hostmem.h