diff mbox

[1/2] exec: Convert bounce buffer to a set

Message ID 1425973810-22831-2-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng March 10, 2015, 7:50 a.m. UTC
With the introduction of dataplane, the global bounce buffer is neither
thread-safe nor multi-thread friendly. The problems are:

 1) Access to "bounce" is not atomic, thus not safe from dataplane
 thread.

 2) Access to "map_client_list" is not atomic, thus not safe from
 dataplane thread.

 3) In dma_blk_cb, there is a race condition between:

        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
        /* ... */
        cpu_register_map_client(dbs, continue_after_map_failure);

    Bounce may become available after dma_memory_map failed but before
    cpu_register_map_client is called.

 4) The callback registered by cpu_register_map_client is not called in the
    right AioContext.

This patch changes the global bounce to a global set, which is protected
by a mutex. This makes the dma_memory_map never returns NULL in the
bounce buffer path.

The dma helper behavior is changed, so update the ide dma test case
too. The test used to trigger a big number of consecutive dma read (as a
result of disabling "busmaster"), which will put the IDE device to "intr
| active" status. After this patch, the dma read is merged to a very big
SG (niov=8192), which results in -EINVAL in the final preadv(2). In this
case, ide ERR bit is set.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 exec.c           | 77 +++++++++++++++++++++++++++++++++++++++-----------------
 tests/ide-test.c |  7 ++++--
 2 files changed, 59 insertions(+), 25 deletions(-)

Comments

Paolo Bonzini March 10, 2015, 11:14 a.m. UTC | #1
On 10/03/2015 08:50, Fam Zheng wrote:
> +    QLIST_ENTRY(BounceBuffer) next;

Where is this used?

> -    if (buffer != bounce.buffer) {
> +    BounceBuffer *bounce;
> +
> +    bounce = bounce_buffer_find_and_remove(buffer);
> +    if (!bounce) {

I'm afraid that this adds a mutex lock/unlock pair and a hash table
lookup on a very hot path.  One possibility is to add a check that the
hash table is not empty in bounce_buffer_find_and_remove.  That can be
done outside the lock so it's fast.

I'm also wondering if it's okay to let the guest do arbitrarily large
memory allocations (e.g. DMA from a huge unassigned memory area above
guest RAM); effectively you're disabling the "/* Avoid unbounded
allocations */" safety guard.

Is it hard to do this while keeping map_clients?

Paolo
Fam Zheng March 11, 2015, 4:57 a.m. UTC | #2
On Tue, 03/10 12:14, Paolo Bonzini wrote:
> 
> 
> On 10/03/2015 08:50, Fam Zheng wrote:
> > +    QLIST_ENTRY(BounceBuffer) next;
> 
> Where is this used?

Unused, I will remove this.

> 
> > -    if (buffer != bounce.buffer) {
> > +    BounceBuffer *bounce;
> > +
> > +    bounce = bounce_buffer_find_and_remove(buffer);
> > +    if (!bounce) {
> 
> I'm afraid that this adds a mutex lock/unlock pair and a hash table
> lookup on a very hot path.  One possibility is to add a check that the
> hash table is not empty in bounce_buffer_find_and_remove.  That can be
> done outside the lock so it's fast.
> 
> I'm also wondering if it's okay to let the guest do arbitrarily large
> memory allocations (e.g. DMA from a huge unassigned memory area above
> guest RAM); effectively you're disabling the "/* Avoid unbounded
> allocations */" safety guard.

This is a good point, it is dangerous to allow that.

> 
> Is it hard to do this while keeping map_clients?

We might need to keep map_client for above reason. I'll take another look into
it.

Fam
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 6a5adab..5943cc2 100644
--- a/exec.c
+++ b/exec.c
@@ -94,6 +94,42 @@  DEFINE_TLS(CPUState *, current_cpu);
    2 = Adaptive rate instruction counting.  */
 int use_icount;
 
+typedef struct BounceBuffer {
+    MemoryRegion *mr;
+    void *buffer;
+    hwaddr addr;
+    hwaddr len;
+    QLIST_ENTRY(BounceBuffer) next;
+} BounceBuffer;
+
+static QemuMutex bounce_lock;
+static GHashTable *bounce_buffers;
+
+static BounceBuffer *bounce_buffer_find_and_remove(void *buffer)
+{
+    BounceBuffer *bounce;
+
+    qemu_mutex_lock(&bounce_lock);
+    bounce = g_hash_table_lookup(bounce_buffers, buffer);
+    if (bounce) {
+        g_hash_table_remove(bounce_buffers, buffer);
+    }
+    qemu_mutex_unlock(&bounce_lock);
+    return bounce;
+}
+
+static inline void bounce_buffer_insert(BounceBuffer *bounce)
+{
+    qemu_mutex_lock(&bounce_lock);
+    g_hash_table_insert(bounce_buffers, bounce->buffer, bounce);
+    qemu_mutex_unlock(&bounce_lock);
+}
+
+static guint bounce_buffer_hash(gconstpointer key)
+{
+    return *(long *)key >> 4;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 
 typedef struct PhysPageEntry PhysPageEntry;
@@ -433,6 +469,8 @@  void cpu_exec_init_all(void)
     memory_map_init();
     io_mem_init();
 #endif
+    qemu_mutex_init(&bounce_lock);
+    bounce_buffers = g_hash_table_new(bounce_buffer_hash, NULL);
 }
 
 #if !defined(CONFIG_USER_ONLY)
@@ -2475,15 +2513,6 @@  void cpu_flush_icache_range(hwaddr start, int len)
                                            start, NULL, len, FLUSH_CACHE);
 }
 
-typedef struct {
-    MemoryRegion *mr;
-    void *buffer;
-    hwaddr addr;
-    hwaddr len;
-} BounceBuffer;
-
-static BounceBuffer bounce;
-
 typedef struct MapClient {
     void *opaque;
     void (*callback)(void *opaque);
@@ -2568,23 +2597,22 @@  void *address_space_map(AddressSpace *as,
     l = len;
     mr = address_space_translate(as, addr, &xlat, &l, is_write);
     if (!memory_access_is_direct(mr, is_write)) {
-        if (bounce.buffer) {
-            return NULL;
-        }
+        BounceBuffer *bounce = g_new(BounceBuffer, 1);
         /* Avoid unbounded allocations */
         l = MIN(l, TARGET_PAGE_SIZE);
-        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
-        bounce.addr = addr;
-        bounce.len = l;
+        bounce->buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
+        bounce->addr = addr;
+        bounce->len = l;
 
         memory_region_ref(mr);
-        bounce.mr = mr;
+        bounce->mr = mr;
         if (!is_write) {
-            address_space_read(as, addr, bounce.buffer, l);
+            address_space_read(as, addr, bounce->buffer, l);
         }
 
         *plen = l;
-        return bounce.buffer;
+        bounce_buffer_insert(bounce);
+        return bounce->buffer;
     }
 
     base = xlat;
@@ -2617,7 +2645,10 @@  void *address_space_map(AddressSpace *as,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len)
 {
-    if (buffer != bounce.buffer) {
+    BounceBuffer *bounce;
+
+    bounce = bounce_buffer_find_and_remove(buffer);
+    if (!bounce) {
         MemoryRegion *mr;
         ram_addr_t addr1;
 
@@ -2633,11 +2664,11 @@  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
         return;
     }
     if (is_write) {
-        address_space_write(as, bounce.addr, bounce.buffer, access_len);
+        address_space_write(as, bounce->addr, bounce->buffer, access_len);
     }
-    qemu_vfree(bounce.buffer);
-    bounce.buffer = NULL;
-    memory_region_unref(bounce.mr);
+    qemu_vfree(bounce->buffer);
+    memory_region_unref(bounce->mr);
+    g_free(bounce);
     cpu_notify_map_clients();
 }
 
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 29f4039..5eb81ec 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -366,6 +366,7 @@  static void test_bmdma_long_prdt(void)
 static void test_bmdma_no_busmaster(void)
 {
     uint8_t status;
+    uint8_t rs;
 
     /* No PRDT_EOT, each entry addr 0/size 64k, and in theory qemu shouldn't be
      * able to access it anyway because the Bus Master bit in the PCI command
@@ -378,8 +379,10 @@  static void test_bmdma_no_busmaster(void)
 
     /* Not entirely clear what the expected result is, but this is what we get
      * in practice. At least we want to be aware of any changes. */
-    g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    g_assert_cmphex(status, ==, BM_STS_INTR);
+    rs = inb(IDE_BASE + reg_status);
+    assert_bit_clear(rs, DF);
+    assert_bit_set(rs, ERR);
 }
 
 static void test_bmdma_setup(void)