Patchwork [RFC,v1,2/3] ramlist: apply fine grain lock for ram_list

login
register
mail settings
Submitter pingfan liu
Date Nov. 9, 2012, 3:14 a.m.
Message ID <1352430870-10129-3-git-send-email-qemulist@gmail.com>
Download mbox | patch
Permalink /patch/197920/
State New
Headers show

Comments

pingfan liu - Nov. 9, 2012, 3:14 a.m.
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 cpu-all.h |    1 +
 exec.c    |   46 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 40 insertions(+), 7 deletions(-)
Paolo Bonzini - Nov. 10, 2012, 1:54 a.m.
Il 09/11/2012 04:14, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  cpu-all.h |    1 +
>  exec.c    |   46 +++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 40 insertions(+), 7 deletions(-)

The problem here is that the ram_list is a pretty critical bit for TCG.

The migration thread series has patches that split the list in two: a
MRU-accessed list that uses the BQL, and another that uses a separate lock.

address_space_map could use the latter list.  In order to improve
performance further, we could sort the list from the biggest to the
smallest region, like KVM does in the kernel.

Paolo

> diff --git a/cpu-all.h b/cpu-all.h
> index 6aa7e58..d3ead99 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -498,6 +498,7 @@ typedef struct RAMBlock {
>  } RAMBlock;
>  
>  typedef struct RAMList {
> +    QemuMutex lock;
>      uint8_t *phys_dirty;
>      QLIST_HEAD(, RAMBlock) blocks;
>  } RAMList;
> diff --git a/exec.c b/exec.c
> index fe84718..e5f1c0f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2444,6 +2444,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
>      if (QLIST_EMPTY(&ram_list.blocks))
>          return 0;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          ram_addr_t end, next = RAM_ADDR_MAX;
>  
> @@ -2459,6 +2460,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
>              mingap = next - end;
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      if (offset == RAM_ADDR_MAX) {
>          fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
> @@ -2474,8 +2476,10 @@ ram_addr_t last_ram_offset(void)
>      RAMBlock *block;
>      ram_addr_t last = 0;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next)
>          last = MAX(last, block->offset + block->length);
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      return last;
>  }
> @@ -2503,6 +2507,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
>      RAMBlock *new_block, *block;
>  
>      new_block = NULL;
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          if (block->offset == addr) {
>              new_block = block;
> @@ -2528,6 +2533,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
>              abort();
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  }
>  
>  static int memory_try_enable_merging(void *addr, size_t len)
> @@ -2582,12 +2588,6 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>      }
>      new_block->length = size;
>  
> -    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> -
> -    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
> -                                       last_ram_offset() >> TARGET_PAGE_BITS);
> -    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> -           0, size >> TARGET_PAGE_BITS);
>      cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
>  
>      qemu_ram_setup_dump(new_block->host, size);
> @@ -2596,6 +2596,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>      if (kvm_enabled())
>          kvm_setup_guest_memory(new_block->host, size);
>  
> +    qemu_mutex_lock(&ram_list.lock);
> +    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> +
> +    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
> +                                       last_ram_offset() >> TARGET_PAGE_BITS);
> +    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> +           0, size >> TARGET_PAGE_BITS);
> +    qemu_mutex_unlock(&ram_list.lock);
> +
>      return new_block->offset;
>  }
>  
> @@ -2608,19 +2617,23 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
>              QLIST_REMOVE(block, next);
>              g_free(block);
> +            qemu_mutex_unlock(&ram_list.lock);
>              return;
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  }
>  
>  void qemu_ram_free(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
>              QLIST_REMOVE(block, next);
> @@ -2649,10 +2662,11 @@ void qemu_ram_free(ram_addr_t addr)
>  #endif
>              }
>              g_free(block);
> +            qemu_mutex_unlock(&ram_list.lock);
>              return;
>          }
>      }
> -
> +    qemu_mutex_unlock(&ram_list.lock);
>  }
>  
>  #ifndef _WIN32
> @@ -2663,6 +2677,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
>      int flags;
>      void *area, *vaddr;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          offset = addr - block->offset;
>          if (offset < block->length) {
> @@ -2711,9 +2726,11 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
>                  memory_try_enable_merging(vaddr, length);
>                  qemu_ram_setup_dump(vaddr, length);
>              }
> +            qemu_mutex_unlock(&ram_list.lock);
>              return;
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  }
>  #endif /* !_WIN32 */
>  
> @@ -2729,6 +2746,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          if (addr - block->offset < block->length) {
>              /* Move this entry to to start of the list.  */
> @@ -2742,15 +2760,18 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>                   * In that case just map until the end of the page.
>                   */
>                  if (block->offset == 0) {
> +                    qemu_mutex_unlock(&ram_list.lock);
>                      return xen_map_cache(addr, 0, 0);
>                  } else if (block->host == NULL) {
>                      block->host =
>                          xen_map_cache(block->offset, block->length, 1);
>                  }
>              }
> +            qemu_mutex_unlock(&ram_list.lock);
>              return block->host + (addr - block->offset);
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>      abort();
> @@ -2765,6 +2786,7 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          if (addr - block->offset < block->length) {
>              if (xen_enabled()) {
> @@ -2773,15 +2795,18 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
>                   * In that case just map until the end of the page.
>                   */
>                  if (block->offset == 0) {
> +                    qemu_mutex_unlock(&ram_list.lock);
>                      return xen_map_cache(addr, 0, 0);
>                  } else if (block->host == NULL) {
>                      block->host =
>                          xen_map_cache(block->offset, block->length, 1);
>                  }
>              }
> +            qemu_mutex_unlock(&ram_list.lock);
>              return block->host + (addr - block->offset);
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>      abort();
> @@ -2801,13 +2826,16 @@ void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
>      } else {
>          RAMBlock *block;
>  
> +        qemu_mutex_lock(&ram_list.lock);
>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>              if (addr - block->offset < block->length) {
>                  if (addr - block->offset + *size > block->length)
>                      *size = block->length - addr + block->offset;
> +                qemu_mutex_lock(&ram_list.lock);
>                  return block->host + (addr - block->offset);
>              }
>          }
> +        qemu_mutex_unlock(&ram_list.lock);
>  
>          fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>          abort();
> @@ -2829,6 +2857,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>          return 0;
>      }
>  
> +    qemu_mutex_lock(&ram_list.lock);
>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>          /* This case append when the block is not mapped. */
>          if (block->host == NULL) {
> @@ -2836,9 +2865,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>          }
>          if (host - block->host < block->length) {
>              *ram_addr = block->offset + (host - block->host);
> +            qemu_mutex_unlock(&ram_list.lock);
>              return 0;
>          }
>      }
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      return -1;
>  }
> @@ -3318,6 +3349,7 @@ static void memory_map_init(void)
>      address_space_io.name = "I/O";
>  
>      qemu_mutex_init(&bounce.lock);
> +    qemu_mutex_unlock(&ram_list.lock);
>  
>      memory_listener_register(&core_memory_listener, &address_space_memory);
>      memory_listener_register(&io_memory_listener, &address_space_io);
>
pingfan liu - Nov. 12, 2012, 6:22 a.m.
On Sat, Nov 10, 2012 at 9:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 09/11/2012 04:14, Liu Ping Fan ha scritto:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  cpu-all.h |    1 +
>>  exec.c    |   46 +++++++++++++++++++++++++++++++++++++++-------
>>  2 files changed, 40 insertions(+), 7 deletions(-)
>
> The problem here is that the ram_list is a pretty critical bit for TCG.
>
This patch does not touch the MRU, so you mean the expense of lock? If
it is, I think, we can define empty lock ops for TCG, and later, when
we adopt urcu, we can come back to fix the TCG problem.
> The migration thread series has patches that split the list in two: a
> MRU-accessed list that uses the BQL, and another that uses a separate lock.
>
I read the thread, but I think we can not protect RAMBlock w/o a
unified lock.  When ram device's refcnt->0, and call
qemu_ram_free_from_ptr(), it can be with/without QBL.  So we need to
sync the two lists: ->blocks and ->blocks_mru on the same lock,
otherwise, a destroyed RAMBlock will be exposed.

Thanks and regards,
Pingfan
> address_space_map could use the latter list.  In order to improve
> performance further, we could sort the list from the biggest to the
> smallest region, like KVM does in the kernel.
>
> Paolo
>
>> diff --git a/cpu-all.h b/cpu-all.h
>> index 6aa7e58..d3ead99 100644
>> --- a/cpu-all.h
>> +++ b/cpu-all.h
>> @@ -498,6 +498,7 @@ typedef struct RAMBlock {
>>  } RAMBlock;
>>
>>  typedef struct RAMList {
>> +    QemuMutex lock;
>>      uint8_t *phys_dirty;
>>      QLIST_HEAD(, RAMBlock) blocks;
>>  } RAMList;
>> diff --git a/exec.c b/exec.c
>> index fe84718..e5f1c0f 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -2444,6 +2444,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
>>      if (QLIST_EMPTY(&ram_list.blocks))
>>          return 0;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          ram_addr_t end, next = RAM_ADDR_MAX;
>>
>> @@ -2459,6 +2460,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
>>              mingap = next - end;
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      if (offset == RAM_ADDR_MAX) {
>>          fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
>> @@ -2474,8 +2476,10 @@ ram_addr_t last_ram_offset(void)
>>      RAMBlock *block;
>>      ram_addr_t last = 0;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next)
>>          last = MAX(last, block->offset + block->length);
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      return last;
>>  }
>> @@ -2503,6 +2507,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
>>      RAMBlock *new_block, *block;
>>
>>      new_block = NULL;
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          if (block->offset == addr) {
>>              new_block = block;
>> @@ -2528,6 +2533,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
>>              abort();
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>  }
>>
>>  static int memory_try_enable_merging(void *addr, size_t len)
>> @@ -2582,12 +2588,6 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>>      }
>>      new_block->length = size;
>>
>> -    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
>> -
>> -    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
>> -                                       last_ram_offset() >> TARGET_PAGE_BITS);
>> -    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
>> -           0, size >> TARGET_PAGE_BITS);
>>      cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
>>
>>      qemu_ram_setup_dump(new_block->host, size);
>> @@ -2596,6 +2596,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>>      if (kvm_enabled())
>>          kvm_setup_guest_memory(new_block->host, size);
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>> +    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
>> +
>> +    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
>> +                                       last_ram_offset() >> TARGET_PAGE_BITS);
>> +    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
>> +           0, size >> TARGET_PAGE_BITS);
>> +    qemu_mutex_unlock(&ram_list.lock);
>> +
>>      return new_block->offset;
>>  }
>>
>> @@ -2608,19 +2617,23 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
>>  {
>>      RAMBlock *block;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          if (addr == block->offset) {
>>              QLIST_REMOVE(block, next);
>>              g_free(block);
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return;
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>  }
>>
>>  void qemu_ram_free(ram_addr_t addr)
>>  {
>>      RAMBlock *block;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          if (addr == block->offset) {
>>              QLIST_REMOVE(block, next);
>> @@ -2649,10 +2662,11 @@ void qemu_ram_free(ram_addr_t addr)
>>  #endif
>>              }
>>              g_free(block);
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return;
>>          }
>>      }
>> -
>> +    qemu_mutex_unlock(&ram_list.lock);
>>  }
>>
>>  #ifndef _WIN32
>> @@ -2663,6 +2677,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
>>      int flags;
>>      void *area, *vaddr;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          offset = addr - block->offset;
>>          if (offset < block->length) {
>> @@ -2711,9 +2726,11 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
>>                  memory_try_enable_merging(vaddr, length);
>>                  qemu_ram_setup_dump(vaddr, length);
>>              }
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return;
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>  }
>>  #endif /* !_WIN32 */
>>
>> @@ -2729,6 +2746,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>>  {
>>      RAMBlock *block;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          if (addr - block->offset < block->length) {
>>              /* Move this entry to to start of the list.  */
>> @@ -2742,15 +2760,18 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>>                   * In that case just map until the end of the page.
>>                   */
>>                  if (block->offset == 0) {
>> +                    qemu_mutex_unlock(&ram_list.lock);
>>                      return xen_map_cache(addr, 0, 0);
>>                  } else if (block->host == NULL) {
>>                      block->host =
>>                          xen_map_cache(block->offset, block->length, 1);
>>                  }
>>              }
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return block->host + (addr - block->offset);
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>>      abort();
>> @@ -2765,6 +2786,7 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
>>  {
>>      RAMBlock *block;
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          if (addr - block->offset < block->length) {
>>              if (xen_enabled()) {
>> @@ -2773,15 +2795,18 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
>>                   * In that case just map until the end of the page.
>>                   */
>>                  if (block->offset == 0) {
>> +                    qemu_mutex_unlock(&ram_list.lock);
>>                      return xen_map_cache(addr, 0, 0);
>>                  } else if (block->host == NULL) {
>>                      block->host =
>>                          xen_map_cache(block->offset, block->length, 1);
>>                  }
>>              }
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return block->host + (addr - block->offset);
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>>      abort();
>> @@ -2801,13 +2826,16 @@ void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
>>      } else {
>>          RAMBlock *block;
>>
>> +        qemu_mutex_lock(&ram_list.lock);
>>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>>              if (addr - block->offset < block->length) {
>>                  if (addr - block->offset + *size > block->length)
>>                      *size = block->length - addr + block->offset;
>> +                qemu_mutex_lock(&ram_list.lock);
>>                  return block->host + (addr - block->offset);
>>              }
>>          }
>> +        qemu_mutex_unlock(&ram_list.lock);
>>
>>          fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
>>          abort();
>> @@ -2829,6 +2857,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>>          return 0;
>>      }
>>
>> +    qemu_mutex_lock(&ram_list.lock);
>>      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>          /* This case append when the block is not mapped. */
>>          if (block->host == NULL) {
>> @@ -2836,9 +2865,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>>          }
>>          if (host - block->host < block->length) {
>>              *ram_addr = block->offset + (host - block->host);
>> +            qemu_mutex_unlock(&ram_list.lock);
>>              return 0;
>>          }
>>      }
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      return -1;
>>  }
>> @@ -3318,6 +3349,7 @@ static void memory_map_init(void)
>>      address_space_io.name = "I/O";
>>
>>      qemu_mutex_init(&bounce.lock);
>> +    qemu_mutex_unlock(&ram_list.lock);
>>
>>      memory_listener_register(&core_memory_listener, &address_space_memory);
>>      memory_listener_register(&io_memory_listener, &address_space_io);
>>
>
Paolo Bonzini - Nov. 12, 2012, 8:48 a.m.
Il 12/11/2012 07:22, liu ping fan ha scritto:
> On Sat, Nov 10, 2012 at 9:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 09/11/2012 04:14, Liu Ping Fan ha scritto:
>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>
>>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>> ---
>>>  cpu-all.h |    1 +
>>>  exec.c    |   46 +++++++++++++++++++++++++++++++++++++++-------
>>>  2 files changed, 40 insertions(+), 7 deletions(-)
>>
>> The problem here is that the ram_list is a pretty critical bit for TCG.
>>
> This patch does not touch the MRU, so you mean the expense of lock?

Yes.

One alternative is to remove the MRU, but add a 1-item cache to speed up
the common case.  Then the case where you use the cache can be placed
(later) in an RCU critical section.

>> The migration thread series has patches that split the list in two: a
>> MRU-accessed list that uses the BQL, and another that uses a separate lock.
>
> I read the thread, but I think we can not protect RAMBlock w/o a
> unified lock.  When ram device's refcnt->0, and call
> qemu_ram_free_from_ptr(), it can be with/without QBL.

Note that you would also split between unmap (which does QLIST_REMOVE)
and free (which actually frees the block).  qemu_ram_free_from_ptr()
would really become qemu_ram_unmap_from_ptr(), and could do part of the
work asynchronously---which makes it free to take and release locks as
needed.  I don't think it is problematic to delay the freeing of the
blocks_mru item which requires BQL.

Paolo
pingfan liu - Nov. 13, 2012, 6:07 a.m.
On Mon, Nov 12, 2012 at 4:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 12/11/2012 07:22, liu ping fan ha scritto:
>> On Sat, Nov 10, 2012 at 9:54 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 09/11/2012 04:14, Liu Ping Fan ha scritto:
>>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>>
>>>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>> ---
>>>>  cpu-all.h |    1 +
>>>>  exec.c    |   46 +++++++++++++++++++++++++++++++++++++++-------
>>>>  2 files changed, 40 insertions(+), 7 deletions(-)
>>>
>>> The problem here is that the ram_list is a pretty critical bit for TCG.
>>>
>> This patch does not touch the MRU, so you mean the expense of lock?
>
> Yes.
>
> One alternative is to remove the MRU, but add a 1-item cache to speed up
> the common case.  Then the case where you use the cache can be placed
> (later) in an RCU critical section.
>
>>> The migration thread series has patches that split the list in two: a
>>> MRU-accessed list that uses the BQL, and another that uses a separate lock.
>>
>> I read the thread, but I think we can not protect RAMBlock w/o a
>> unified lock.  When ram device's refcnt->0, and call
>> qemu_ram_free_from_ptr(), it can be with/without QBL.
>
> Note that you would also split between unmap (which does QLIST_REMOVE)
> and free (which actually frees the block).  qemu_ram_free_from_ptr()
> would really become qemu_ram_unmap_from_ptr(), and could do part of the
> work asynchronously---which makes it free to take and release locks as
> needed.  I don't think it is problematic to delay the freeing of the
> blocks_mru item which requires BQL.
>
Right.  Then just one thing left, the big lock may be called
recursively.  Do we have some elegant method to handle  it?

Regards,
Pingfan
> Paolo

Patch

diff --git a/cpu-all.h b/cpu-all.h
index 6aa7e58..d3ead99 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -498,6 +498,7 @@  typedef struct RAMBlock {
 } RAMBlock;
 
 typedef struct RAMList {
+    QemuMutex lock;
     uint8_t *phys_dirty;
     QLIST_HEAD(, RAMBlock) blocks;
 } RAMList;
diff --git a/exec.c b/exec.c
index fe84718..e5f1c0f 100644
--- a/exec.c
+++ b/exec.c
@@ -2444,6 +2444,7 @@  static ram_addr_t find_ram_offset(ram_addr_t size)
     if (QLIST_EMPTY(&ram_list.blocks))
         return 0;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         ram_addr_t end, next = RAM_ADDR_MAX;
 
@@ -2459,6 +2460,7 @@  static ram_addr_t find_ram_offset(ram_addr_t size)
             mingap = next - end;
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 
     if (offset == RAM_ADDR_MAX) {
         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
@@ -2474,8 +2476,10 @@  ram_addr_t last_ram_offset(void)
     RAMBlock *block;
     ram_addr_t last = 0;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next)
         last = MAX(last, block->offset + block->length);
+    qemu_mutex_unlock(&ram_list.lock);
 
     return last;
 }
@@ -2503,6 +2507,7 @@  void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     RAMBlock *new_block, *block;
 
     new_block = NULL;
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (block->offset == addr) {
             new_block = block;
@@ -2528,6 +2533,7 @@  void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
             abort();
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 }
 
 static int memory_try_enable_merging(void *addr, size_t len)
@@ -2582,12 +2588,6 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     }
     new_block->length = size;
 
-    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
-
-    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
-                                       last_ram_offset() >> TARGET_PAGE_BITS);
-    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
-           0, size >> TARGET_PAGE_BITS);
     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
 
     qemu_ram_setup_dump(new_block->host, size);
@@ -2596,6 +2596,15 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
 
+    qemu_mutex_lock(&ram_list.lock);
+    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
+                                       last_ram_offset() >> TARGET_PAGE_BITS);
+    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+           0, size >> TARGET_PAGE_BITS);
+    qemu_mutex_unlock(&ram_list.lock);
+
     return new_block->offset;
 }
 
@@ -2608,19 +2617,23 @@  void qemu_ram_free_from_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
             g_free(block);
+            qemu_mutex_unlock(&ram_list.lock);
             return;
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 }
 
 void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
@@ -2649,10 +2662,11 @@  void qemu_ram_free(ram_addr_t addr)
 #endif
             }
             g_free(block);
+            qemu_mutex_unlock(&ram_list.lock);
             return;
         }
     }
-
+    qemu_mutex_unlock(&ram_list.lock);
 }
 
 #ifndef _WIN32
@@ -2663,6 +2677,7 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     int flags;
     void *area, *vaddr;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->length) {
@@ -2711,9 +2726,11 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 memory_try_enable_merging(vaddr, length);
                 qemu_ram_setup_dump(vaddr, length);
             }
+            qemu_mutex_unlock(&ram_list.lock);
             return;
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 }
 #endif /* !_WIN32 */
 
@@ -2729,6 +2746,7 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             /* Move this entry to to start of the list.  */
@@ -2742,15 +2760,18 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
                  * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
+                    qemu_mutex_unlock(&ram_list.lock);
                     return xen_map_cache(addr, 0, 0);
                 } else if (block->host == NULL) {
                     block->host =
                         xen_map_cache(block->offset, block->length, 1);
                 }
             }
+            qemu_mutex_unlock(&ram_list.lock);
             return block->host + (addr - block->offset);
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 
     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
     abort();
@@ -2765,6 +2786,7 @@  void *qemu_safe_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             if (xen_enabled()) {
@@ -2773,15 +2795,18 @@  void *qemu_safe_ram_ptr(ram_addr_t addr)
                  * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
+                    qemu_mutex_unlock(&ram_list.lock);
                     return xen_map_cache(addr, 0, 0);
                 } else if (block->host == NULL) {
                     block->host =
                         xen_map_cache(block->offset, block->length, 1);
                 }
             }
+            qemu_mutex_unlock(&ram_list.lock);
             return block->host + (addr - block->offset);
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 
     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
     abort();
@@ -2801,13 +2826,16 @@  void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
     } else {
         RAMBlock *block;
 
+        qemu_mutex_lock(&ram_list.lock);
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             if (addr - block->offset < block->length) {
                 if (addr - block->offset + *size > block->length)
                     *size = block->length - addr + block->offset;
+                qemu_mutex_lock(&ram_list.lock);
                 return block->host + (addr - block->offset);
             }
         }
+        qemu_mutex_unlock(&ram_list.lock);
 
         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
         abort();
@@ -2829,6 +2857,7 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         return 0;
     }
 
+    qemu_mutex_lock(&ram_list.lock);
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
@@ -2836,9 +2865,11 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         }
         if (host - block->host < block->length) {
             *ram_addr = block->offset + (host - block->host);
+            qemu_mutex_unlock(&ram_list.lock);
             return 0;
         }
     }
+    qemu_mutex_unlock(&ram_list.lock);
 
     return -1;
 }
@@ -3318,6 +3349,7 @@  static void memory_map_init(void)
     address_space_io.name = "I/O";
 
     qemu_mutex_init(&bounce.lock);
+    qemu_mutex_unlock(&ram_list.lock);
 
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);