Patchwork [RFC,13/13] RCUify ram_list

login
register
mail settings
Submitter Paolo Bonzini
Date Aug. 15, 2011, 9:08 p.m.
Message ID <1313442520-12062-14-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/110097/
State New
Headers show

Comments

Paolo Bonzini - Aug. 15, 2011, 9:08 p.m.
Incomplete because the users of qemu_get_ram_ptr should be wrapped
with rcu_read_lock/rcu_read_unlock.  Happens to work because those
are nops anyway. :)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch_init.c |   14 +++++++
 cpu-all.h   |    4 ++
 exec.c      |  124 ++++++++++++++++++++++++++++++++++++++++------------------
 3 files changed, 103 insertions(+), 39 deletions(-)

Patch

diff --git a/arch_init.c b/arch_init.c
index 484b39d..f5a567b 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -117,6 +117,7 @@  static int ram_save_block(QEMUFile *f)
     ram_addr_t current_addr;
     int bytes_sent = 0;
 
+    rcu_read_lock();
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
 
@@ -167,6 +168,7 @@  static int ram_save_block(QEMUFile *f)
         current_addr = block->offset + offset;
 
     } while (current_addr != last_block->offset + last_offset);
+    rcu_read_unlock();
 
     last_block = block;
     last_offset = offset;
@@ -181,6 +183,7 @@  static ram_addr_t ram_save_remaining(void)
     RAMBlock *block;
     ram_addr_t count = 0;
 
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         ram_addr_t addr;
         for (addr = block->offset; addr < block->offset + block->length;
@@ -190,6 +193,7 @@  static ram_addr_t ram_save_remaining(void)
             }
         }
     }
+    rcu_read_unlock();
 
     return count;
 }
@@ -209,8 +213,10 @@  uint64_t ram_bytes_total(void)
     RAMBlock *block;
     uint64_t total = 0;
 
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks, next)
         total += block->length;
+    rcu_read_unlock();
 
     return total;
 }
@@ -232,6 +238,7 @@  static void sort_ram_list(void)
     RAMBlock *block, *nblock, **blocks;
     int n;
     n = 0;
+    //qemu_mutex_lock(&ram_list.mutex);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         ++n;
     }
@@ -245,6 +252,7 @@  static void sort_ram_list(void)
     while (--n >= 0) {
         QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
     }
+    //qemu_mutex_unlock(&ram_list.mutex);
     qemu_free(blocks);
 }
 
@@ -273,6 +281,7 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         sort_ram_list();
 
         /* Make sure all dirty bits are set */
+	rcu_read_lock();
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
                  addr += TARGET_PAGE_SIZE) {
@@ -282,17 +291,20 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
                 }
             }
         }
+	rcu_read_unlock();
 
         /* Enable dirty memory tracking */
         cpu_physical_memory_set_dirty_tracking(1);
 
         qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
+	rcu_read_lock();
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             qemu_put_byte(f, strlen(block->idstr));
             qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
             qemu_put_be64(f, block->length);
         }
+	rcu_read_unlock();
     }
 
     bytes_transferred_last = bytes_transferred;
@@ -374,6 +386,7 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
         return -EINVAL;
     }
 
+    rcu_read_lock();
     do {
         addr = qemu_get_be64(f);
 
@@ -453,6 +466,7 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
             return -EIO;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
+    rcu_read_unlock();
 
     return 0;
 }
diff --git a/cpu-all.h b/cpu-all.h
index 083d9e6..7ed3f75 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -21,6 +21,7 @@ 
 
 #include "qemu-common.h"
 #include "cpu-common.h"
+#include "rcu.h"
 
 /* some important defines:
  *
@@ -475,11 +476,14 @@  extern ram_addr_t ram_size;
 #define RAM_PREALLOC_MASK   (1 << 0)
 
 typedef struct RAMBlock {
+    struct rcu_head h;
     uint8_t *host;
     ram_addr_t offset;
     ram_addr_t length;
     uint32_t flags;
+    // should be protected by its own lock + RCU on the read side
     QLIST_ENTRY(RAMBlock) next;
+    // protected by the iothread lock + RCU to on the read side
     QLIST_ENTRY(RAMBlock) next_mru;
     char idstr[256];
 #if defined(__linux__) && !defined(TARGET_S390X)
diff --git a/exec.c b/exec.c
index d25e3cc..6a7cec7 100644
--- a/exec.c
+++ b/exec.c
@@ -2935,6 +2935,7 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
+    //qemu_mutex_lock(&ram_list.mutex);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (!strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
@@ -2986,6 +2987,7 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    //qemu_mutex_unlock(&ram_list.mutex);
     QLIST_INSERT_HEAD(&ram_list.blocks_mru, new_block, next_mru);
 
     ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
@@ -3008,52 +3010,62 @@  void qemu_ram_free_from_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    //qemu_mutex_lock(&ram_list.mutex);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
             QLIST_REMOVE(block, next_mru);
-            qemu_free(block);
+            call_rcu(&block->h, rcu_free);
             return;
         }
     }
+    //qemu_mutex_unlock(&ram_list.mutex);
+}
+
+static void rcu_free_block(struct rcu_head *h)
+{
+    RAMBlock *block = container_of(h, RAMBlock, h);
+    if (block->flags & RAM_PREALLOC_MASK) {
+        ;
+    } else if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+        if (block->fd) {
+            munmap(block->host, block->length);
+            close(block->fd);
+        } else {
+            qemu_vfree(block->host);
+        }
+#else
+        abort();
+#endif
+    } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+        munmap(block->host, block->length);
+#else
+        if (xen_enabled()) {
+            xen_invalidate_map_cache_entry(block->host);
+        } else {
+            qemu_vfree(block->host);
+        }
+#endif
+    }
+    qemu_free(block);
 }
 
 void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    //qemu_mutex_lock(&ram_list.mutex);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
             QLIST_REMOVE(block, next_mru);
-            if (block->flags & RAM_PREALLOC_MASK) {
-                ;
-            } else if (mem_path) {
-#if defined (__linux__) && !defined(TARGET_S390X)
-                if (block->fd) {
-                    munmap(block->host, block->length);
-                    close(block->fd);
-                } else {
-                    qemu_vfree(block->host);
-                }
-#else
-                abort();
-#endif
-            } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-                munmap(block->host, block->length);
-#else
-                if (xen_enabled()) {
-                    xen_invalidate_map_cache_entry(block->host);
-                } else {
-                    qemu_vfree(block->host);
-                }
-#endif
-            }
-            qemu_free(block);
+            call_rcu(&block->h, rcu_free_block);
             return;
         }
     }
+    //qemu_mutex_unlock(&ram_list.mutex);
 
 }
 
@@ -3065,6 +3077,7 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     int flags;
     void *area, *vaddr;
 
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->length) {
@@ -3072,8 +3085,9 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
             } else {
+		/* No need to munmap the memory, mmap will discard the old mapping
+		 * atomically.  */
                 flags = MAP_FIXED;
-                munmap(vaddr, length);
                 if (mem_path) {
 #if defined(__linux__) && !defined(TARGET_S390X)
                     if (block->fd) {
@@ -3112,9 +3126,10 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 }
                 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
             }
-            return;
+            break;
         }
     }
+    rcu_read_unlock();
 }
 #endif /* !_WIN32 */
 
@@ -3128,8 +3143,16 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
  */
 void *qemu_get_ram_ptr(ram_addr_t addr)
 {
+    uint8_t *p = NULL;
     RAMBlock *block;
 
+    /* RCU protects the "existence" of the blocks, the iothread lock
+     * protects the next_mru chain.  This rcu_read_lock() is most
+     * likely nested, since the caller probably wants to do something
+     * with the result as well!  FIXME: this is not done anywhere yet.
+     * Due to our RCU implementation we can avoid that, but it's not
+     * clean. */
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) {
         if (addr - block->offset < block->length) {
             /* Move this entry to to start of the list.  */
@@ -3143,20 +3166,24 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
                  * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
-                    return xen_map_cache(addr, 0, 0);
+                    p = xen_map_cache(addr, 0, 0);
+		    break;
                 } else if (block->host == NULL) {
                     block->host =
                         xen_map_cache(block->offset, block->length, 1);
                 }
             }
-            return block->host + (addr - block->offset);
+	    p = block->host + (addr - block->offset);
+            break;
         }
     }
+    rcu_read_unlock();
 
-    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
-    abort();
-
-    return NULL;
+    if (!p) {
+        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+        abort();
+    }
+    return p;
 }
 
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
@@ -3164,8 +3191,10 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
  */
 void *qemu_safe_ram_ptr(ram_addr_t addr)
 {
+    uint8_t *p = NULL;
     RAMBlock *block;
 
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             if (xen_enabled()) {
@@ -3174,20 +3203,25 @@  void *qemu_safe_ram_ptr(ram_addr_t addr)
                  * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
-                    return xen_map_cache(addr, 0, 0);
+                    p = xen_map_cache(addr, 0, 0);
+		    break;
                 } else if (block->host == NULL) {
                     block->host =
                         xen_map_cache(block->offset, block->length, 1);
                 }
             }
-            return block->host + (addr - block->offset);
+            p = block->host + (addr - block->offset);
+	    break;
         }
     }
+    rcu_read_unlock();
 
-    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
-    abort();
+    if (!p) {
+        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+        abort();
+    }
 
-    return NULL;
+    return p;
 }
 
 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
@@ -3202,13 +3236,16 @@  void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
     } else {
         RAMBlock *block;
 
+	rcu_read_lock();
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             if (addr - block->offset < block->length) {
                 if (addr - block->offset + *size > block->length)
                     *size = block->length - addr + block->offset;
+	        rcu_read_unlock();
                 return block->host + (addr - block->offset);
             }
         }
+	rcu_read_unlock();
 
         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
         abort();
@@ -3230,6 +3267,13 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         return 0;
     }
 
+    /* RCU protects the "existence" of the blocks, the iothread lock
+     * protects the next_mru chain.  This rcu_read_lock() is most
+     * likely nested, since the caller probably wants to do something
+     * with the result as well!  FIXME: this is not done anywhere yet.
+     * Due to our RCU implementation we can avoid that, but it's not
+     * clean. */
+    rcu_read_lock();
     QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
@@ -3237,9 +3281,11 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         }
         if (host - block->host < block->length) {
             *ram_addr = block->offset + (host - block->host);
+	    rcu_read_unlock();
             return 0;
         }
     }
+    rcu_read_unlock();
 
     return -1;
 }