Patchwork [04/15] memory: MemoryRegion topology must be stable when updating

login
register
mail settings
Submitter pingfan liu
Date Aug. 8, 2012, 6:25 a.m.
Message ID <1344407156-25562-5-git-send-email-qemulist@gmail.com>
Download mbox | patch
Permalink /patch/175872/
State New
Headers show

Comments

pingfan liu - Aug. 8, 2012, 6:25 a.m.
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Using mem_map_lock to protect among updaters. So we can get the intact
snapshot of mem topology -- FlatView & radix-tree.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 exec.c   |    3 +++
 memory.c |   22 ++++++++++++++++++++++
 memory.h |    2 ++
 3 files changed, 27 insertions(+), 0 deletions(-)
Avi Kivity - Aug. 8, 2012, 9:13 a.m.
On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Using mem_map_lock to protect among updaters. So we can get the intact
> snapshot of mem topology -- FlatView & radix-tree.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  exec.c   |    3 +++
>  memory.c |   22 ++++++++++++++++++++++
>  memory.h |    2 ++
>  3 files changed, 27 insertions(+), 0 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 8244d54..0e29ef9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>     The bottom level has pointers to MemoryRegionSections.  */
>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>  
> +QemuMutex mem_map_lock;
> +
>  static void io_mem_init(void);
>  static void memory_map_init(void);
>  
> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
>  #if !defined(CONFIG_USER_ONLY)
>      memory_map_init();
>      io_mem_init();
> +    qemu_mutex_init(&mem_map_lock);
>  #endif
>  }
>  
> diff --git a/memory.c b/memory.c
> index aab4a31..5986532 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
>      assert(memory_region_transaction_depth);
>      --memory_region_transaction_depth;
>      if (!memory_region_transaction_depth && memory_region_update_pending) {
> +        qemu_mutex_lock(&mem_map_lock);
>          memory_region_update_topology(NULL);
> +        qemu_mutex_unlock(&mem_map_lock);
>      }
>  }

Seems to me that nothing in memory.c can susceptible to races.  It must
already be called under the big qemu lock, and with the exception of
mutators (memory_region_set_*), changes aren't directly visible.

I think it's sufficient to take the mem_map_lock at the beginning of
core_begin() and drop it at the end of core_commit().  That means all
updates of volatile state, phys_map, are protected.
Blue Swirl - Aug. 8, 2012, 7:17 p.m.
On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Using mem_map_lock to protect among updaters. So we can get the intact
> snapshot of mem topology -- FlatView & radix-tree.
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  exec.c   |    3 +++
>  memory.c |   22 ++++++++++++++++++++++
>  memory.h |    2 ++
>  3 files changed, 27 insertions(+), 0 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 8244d54..0e29ef9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>     The bottom level has pointers to MemoryRegionSections.  */
>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>
> +QemuMutex mem_map_lock;
> +
>  static void io_mem_init(void);
>  static void memory_map_init(void);
>
> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
>  #if !defined(CONFIG_USER_ONLY)
>      memory_map_init();
>      io_mem_init();
> +    qemu_mutex_init(&mem_map_lock);

I'd move this and the mutex to memory.c since there are no other uses.
The mutex could be static then.

>  #endif
>  }
>
> diff --git a/memory.c b/memory.c
> index aab4a31..5986532 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
>      assert(memory_region_transaction_depth);
>      --memory_region_transaction_depth;
>      if (!memory_region_transaction_depth && memory_region_update_pending) {
> +        qemu_mutex_lock(&mem_map_lock);
>          memory_region_update_topology(NULL);
> +        qemu_mutex_unlock(&mem_map_lock);
>      }
>  }
>
> @@ -1069,8 +1071,10 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
>  {
>      uint8_t mask = 1 << client;
>
> +    qemu_mutex_lock(&mem_map_lock);
>      mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
> @@ -1103,8 +1107,10 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
>  {
>      if (mr->readonly != readonly) {
> +        qemu_mutex_lock(&mem_map_lock);
>          mr->readonly = readonly;
>          memory_region_update_topology(mr);
> +        qemu_mutex_unlock(&mem_map_lock);
>      }
>  }
>
> @@ -1112,7 +1118,9 @@ void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
>  {
>      if (mr->readable != readable) {
>          mr->readable = readable;
> +        qemu_mutex_lock(&mem_map_lock);
>          memory_region_update_topology(mr);
> +        qemu_mutex_unlock(&mem_map_lock);
>      }
>  }
>
> @@ -1206,6 +1214,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>
> +    qemu_mutex_lock(&mem_map_lock);
>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
>              break;
> @@ -1218,6 +1227,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>              sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
>      mr->ioeventfds[i] = mrfd;
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  void memory_region_del_eventfd(MemoryRegion *mr,
> @@ -1236,6 +1246,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>
> +    qemu_mutex_lock(&mem_map_lock);
>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
>              break;
> @@ -1248,6 +1259,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>      mr->ioeventfds = g_realloc(mr->ioeventfds,
>                                    sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  static void memory_region_add_subregion_common(MemoryRegion *mr,
> @@ -1259,6 +1271,8 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>      assert(!subregion->parent);
>      subregion->parent = mr;
>      subregion->addr = offset;
> +
> +    qemu_mutex_lock(&mem_map_lock);
>      QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
>          if (subregion->may_overlap || other->may_overlap) {
>              continue;
> @@ -1289,6 +1303,7 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>      QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
>  done:
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>
> @@ -1316,8 +1331,11 @@ void memory_region_del_subregion(MemoryRegion *mr,
>  {
>      assert(subregion->parent == mr);
>      subregion->parent = NULL;
> +
> +    qemu_mutex_lock(&mem_map_lock);
>      QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
> @@ -1325,8 +1343,10 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
>      if (enabled == mr->enabled) {
>          return;
>      }
> +    qemu_mutex_lock(&mem_map_lock);
>      mr->enabled = enabled;
>      memory_region_update_topology(NULL);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr)
> @@ -1361,7 +1381,9 @@ void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset)
>          return;
>      }
>
> +    qemu_mutex_lock(&mem_map_lock);
>      memory_region_update_topology(mr);
> +    qemu_mutex_unlock(&mem_map_lock);
>  }
>
>  ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
> diff --git a/memory.h b/memory.h
> index 740c48e..fe6aefa 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -25,6 +25,7 @@
>  #include "iorange.h"
>  #include "ioport.h"
>  #include "int128.h"
> +#include "qemu-thread.h"
>
>  typedef struct MemoryRegionOps MemoryRegionOps;
>  typedef struct MemoryRegion MemoryRegion;
> @@ -207,6 +208,7 @@ struct MemoryListener {
>      QTAILQ_ENTRY(MemoryListener) link;
>  };
>
> +extern QemuMutex mem_map_lock;
>  /**
>   * memory_region_init: Initialize a memory region
>   *
> --
> 1.7.4.4
>
pingfan liu - Aug. 9, 2012, 7:28 a.m.
On Thu, Aug 9, 2012 at 3:17 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Using mem_map_lock to protect among updaters. So we can get the intact
>> snapshot of mem topology -- FlatView & radix-tree.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  exec.c   |    3 +++
>>  memory.c |   22 ++++++++++++++++++++++
>>  memory.h |    2 ++
>>  3 files changed, 27 insertions(+), 0 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 8244d54..0e29ef9 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>>     The bottom level has pointers to MemoryRegionSections.  */
>>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>>
>> +QemuMutex mem_map_lock;
>> +
>>  static void io_mem_init(void);
>>  static void memory_map_init(void);
>>
>> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
>>  #if !defined(CONFIG_USER_ONLY)
>>      memory_map_init();
>>      io_mem_init();
>> +    qemu_mutex_init(&mem_map_lock);
>
> I'd move this and the mutex to memory.c since there are no other uses.
> The mutex could be static then.
>
But the init entry is in exec.c, not memory.c.

Regards,
pingfan

>>  #endif
>>  }
>>
>> diff --git a/memory.c b/memory.c
>> index aab4a31..5986532 100644
>> --- a/memory.c
>> +++ b/memory.c
>> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
>>      assert(memory_region_transaction_depth);
>>      --memory_region_transaction_depth;
>>      if (!memory_region_transaction_depth && memory_region_update_pending) {
>> +        qemu_mutex_lock(&mem_map_lock);
>>          memory_region_update_topology(NULL);
>> +        qemu_mutex_unlock(&mem_map_lock);
>>      }
>>  }
>>
>> @@ -1069,8 +1071,10 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
>>  {
>>      uint8_t mask = 1 << client;
>>
>> +    qemu_mutex_lock(&mem_map_lock);
>>      mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>> @@ -1103,8 +1107,10 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
>>  {
>>      if (mr->readonly != readonly) {
>> +        qemu_mutex_lock(&mem_map_lock);
>>          mr->readonly = readonly;
>>          memory_region_update_topology(mr);
>> +        qemu_mutex_unlock(&mem_map_lock);
>>      }
>>  }
>>
>> @@ -1112,7 +1118,9 @@ void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
>>  {
>>      if (mr->readable != readable) {
>>          mr->readable = readable;
>> +        qemu_mutex_lock(&mem_map_lock);
>>          memory_region_update_topology(mr);
>> +        qemu_mutex_unlock(&mem_map_lock);
>>      }
>>  }
>>
>> @@ -1206,6 +1214,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>>      };
>>      unsigned i;
>>
>> +    qemu_mutex_lock(&mem_map_lock);
>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
>>              break;
>> @@ -1218,6 +1227,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>>              sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
>>      mr->ioeventfds[i] = mrfd;
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  void memory_region_del_eventfd(MemoryRegion *mr,
>> @@ -1236,6 +1246,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>>      };
>>      unsigned i;
>>
>> +    qemu_mutex_lock(&mem_map_lock);
>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
>>              break;
>> @@ -1248,6 +1259,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>>      mr->ioeventfds = g_realloc(mr->ioeventfds,
>>                                    sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  static void memory_region_add_subregion_common(MemoryRegion *mr,
>> @@ -1259,6 +1271,8 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>>      assert(!subregion->parent);
>>      subregion->parent = mr;
>>      subregion->addr = offset;
>> +
>> +    qemu_mutex_lock(&mem_map_lock);
>>      QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
>>          if (subregion->may_overlap || other->may_overlap) {
>>              continue;
>> @@ -1289,6 +1303,7 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>>      QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
>>  done:
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>
>> @@ -1316,8 +1331,11 @@ void memory_region_del_subregion(MemoryRegion *mr,
>>  {
>>      assert(subregion->parent == mr);
>>      subregion->parent = NULL;
>> +
>> +    qemu_mutex_lock(&mem_map_lock);
>>      QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
>> @@ -1325,8 +1343,10 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
>>      if (enabled == mr->enabled) {
>>          return;
>>      }
>> +    qemu_mutex_lock(&mem_map_lock);
>>      mr->enabled = enabled;
>>      memory_region_update_topology(NULL);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr)
>> @@ -1361,7 +1381,9 @@ void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset)
>>          return;
>>      }
>>
>> +    qemu_mutex_lock(&mem_map_lock);
>>      memory_region_update_topology(mr);
>> +    qemu_mutex_unlock(&mem_map_lock);
>>  }
>>
>>  ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
>> diff --git a/memory.h b/memory.h
>> index 740c48e..fe6aefa 100644
>> --- a/memory.h
>> +++ b/memory.h
>> @@ -25,6 +25,7 @@
>>  #include "iorange.h"
>>  #include "ioport.h"
>>  #include "int128.h"
>> +#include "qemu-thread.h"
>>
>>  typedef struct MemoryRegionOps MemoryRegionOps;
>>  typedef struct MemoryRegion MemoryRegion;
>> @@ -207,6 +208,7 @@ struct MemoryListener {
>>      QTAILQ_ENTRY(MemoryListener) link;
>>  };
>>
>> +extern QemuMutex mem_map_lock;
>>  /**
>>   * memory_region_init: Initialize a memory region
>>   *
>> --
>> 1.7.4.4
>>
pingfan liu - Aug. 9, 2012, 7:28 a.m.
On Wed, Aug 8, 2012 at 5:13 PM, Avi Kivity <avi@redhat.com> wrote:
> On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Using mem_map_lock to protect among updaters. So we can get the intact
>> snapshot of mem topology -- FlatView & radix-tree.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  exec.c   |    3 +++
>>  memory.c |   22 ++++++++++++++++++++++
>>  memory.h |    2 ++
>>  3 files changed, 27 insertions(+), 0 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 8244d54..0e29ef9 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>>     The bottom level has pointers to MemoryRegionSections.  */
>>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>>
>> +QemuMutex mem_map_lock;
>> +
>>  static void io_mem_init(void);
>>  static void memory_map_init(void);
>>
>> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
>>  #if !defined(CONFIG_USER_ONLY)
>>      memory_map_init();
>>      io_mem_init();
>> +    qemu_mutex_init(&mem_map_lock);
>>  #endif
>>  }
>>
>> diff --git a/memory.c b/memory.c
>> index aab4a31..5986532 100644
>> --- a/memory.c
>> +++ b/memory.c
>> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
>>      assert(memory_region_transaction_depth);
>>      --memory_region_transaction_depth;
>>      if (!memory_region_transaction_depth && memory_region_update_pending) {
>> +        qemu_mutex_lock(&mem_map_lock);
>>          memory_region_update_topology(NULL);
>> +        qemu_mutex_unlock(&mem_map_lock);
>>      }
>>  }
>
> Seems to me that nothing in memory.c can susceptible to races.  It must
> already be called under the big qemu lock, and with the exception of
> mutators (memory_region_set_*), changes aren't directly visible.
>
Yes, what I want to do is "prepare unplug out of protection of global
lock".  When io-dispatch and mmio-dispatch are all out of big lock, we
will run into the following scene:
    In vcpu context A, qdev_unplug_complete()-> delete subregion;
    In context B, write pci bar --> pci mapping update    -> add subregion

> I think it's sufficient to take the mem_map_lock at the beginning of
> core_begin() and drop it at the end of core_commit().  That means all
> updates of volatile state, phys_map, are protected.
>
The mem_map_lock is to protect both address_space_io and
address_space_memory. When without the protection of big lock,
competing will raise among the updaters
(memory_region_{add,del}_subregion and the readers
generate_memory_topology()->render_memory_region().

If just in core_begin/commit, we will duplicate it for
xx_begin/commit, right?  And at the same time, mr->subregions is
exposed under SMP without big lock.

Thanks and regards,
pingfan

>
>
> --
> error compiling committee.c: too many arguments to function
Avi Kivity - Aug. 9, 2012, 8:24 a.m.
On 08/09/2012 10:28 AM, liu ping fan wrote:
>>
>> Seems to me that nothing in memory.c can susceptible to races.  It must
>> already be called under the big qemu lock, and with the exception of
>> mutators (memory_region_set_*), changes aren't directly visible.
>>
> Yes, what I want to do is "prepare unplug out of protection of global
> lock".  When io-dispatch and mmio-dispatch are all out of big lock, we
> will run into the following scene:
>     In vcpu context A, qdev_unplug_complete()-> delete subregion;
>     In context B, write pci bar --> pci mapping update    -> add subregion

Why do you want unlocked unplug?  Unplug is rare and complicated; there
are no performance considerations on one hand, and difficulty of testing
for lock correctness on the other.  I think it is better if it remains
protected by the global lock.

> 
>> I think it's sufficient to take the mem_map_lock at the beginning of
>> core_begin() and drop it at the end of core_commit().  That means all
>> updates of volatile state, phys_map, are protected.
>>
> The mem_map_lock is to protect both address_space_io and
> address_space_memory. When without the protection of big lock,
> competing will raise among the updaters
> (memory_region_{add,del}_subregion and the readers
> generate_memory_topology()->render_memory_region().

These should all run under the big qemu lock, for the same reasons.
They are rare and not performance sensitive.  Only phys_map reads are
performance sensitive.

> 
> If just in core_begin/commit, we will duplicate it for
> xx_begin/commit, right?  

No.  Other listeners will be protected by the global lock.

> And at the same time, mr->subregions is
> exposed under SMP without big lock.
> 

Who accesses it?

IMO locking should look like:

  phys_map: mem_map_lock
  dispatch callbacks: device specific lock (or big qemu lock for
unconverted devices)
  everything else: big qemu lock
Blue Swirl - Aug. 9, 2012, 5:09 p.m.
On Thu, Aug 9, 2012 at 7:28 AM, liu ping fan <qemulist@gmail.com> wrote:
> On Thu, Aug 9, 2012 at 3:17 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
>> On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>
>>> Using mem_map_lock to protect among updaters. So we can get the intact
>>> snapshot of mem topology -- FlatView & radix-tree.
>>>
>>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>> ---
>>>  exec.c   |    3 +++
>>>  memory.c |   22 ++++++++++++++++++++++
>>>  memory.h |    2 ++
>>>  3 files changed, 27 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/exec.c b/exec.c
>>> index 8244d54..0e29ef9 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>>>     The bottom level has pointers to MemoryRegionSections.  */
>>>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>>>
>>> +QemuMutex mem_map_lock;
>>> +
>>>  static void io_mem_init(void);
>>>  static void memory_map_init(void);
>>>
>>> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
>>>  #if !defined(CONFIG_USER_ONLY)
>>>      memory_map_init();
>>>      io_mem_init();
>>> +    qemu_mutex_init(&mem_map_lock);
>>
>> I'd move this and the mutex to memory.c since there are no other uses.
>> The mutex could be static then.
>>
> But the init entry is in exec.c, not memory.c.

Memory subsystem does not have an init function of its own, this can
be the start of it.

>
> Regards,
> pingfan
>
>>>  #endif
>>>  }
>>>
>>> diff --git a/memory.c b/memory.c
>>> index aab4a31..5986532 100644
>>> --- a/memory.c
>>> +++ b/memory.c
>>> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
>>>      assert(memory_region_transaction_depth);
>>>      --memory_region_transaction_depth;
>>>      if (!memory_region_transaction_depth && memory_region_update_pending) {
>>> +        qemu_mutex_lock(&mem_map_lock);
>>>          memory_region_update_topology(NULL);
>>> +        qemu_mutex_unlock(&mem_map_lock);
>>>      }
>>>  }
>>>
>>> @@ -1069,8 +1071,10 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
>>>  {
>>>      uint8_t mask = 1 << client;
>>>
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>>> @@ -1103,8 +1107,10 @@ void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>>>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
>>>  {
>>>      if (mr->readonly != readonly) {
>>> +        qemu_mutex_lock(&mem_map_lock);
>>>          mr->readonly = readonly;
>>>          memory_region_update_topology(mr);
>>> +        qemu_mutex_unlock(&mem_map_lock);
>>>      }
>>>  }
>>>
>>> @@ -1112,7 +1118,9 @@ void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
>>>  {
>>>      if (mr->readable != readable) {
>>>          mr->readable = readable;
>>> +        qemu_mutex_lock(&mem_map_lock);
>>>          memory_region_update_topology(mr);
>>> +        qemu_mutex_unlock(&mem_map_lock);
>>>      }
>>>  }
>>>
>>> @@ -1206,6 +1214,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>>>      };
>>>      unsigned i;
>>>
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>>          if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
>>>              break;
>>> @@ -1218,6 +1227,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>>>              sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
>>>      mr->ioeventfds[i] = mrfd;
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  void memory_region_del_eventfd(MemoryRegion *mr,
>>> @@ -1236,6 +1246,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>>>      };
>>>      unsigned i;
>>>
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      for (i = 0; i < mr->ioeventfd_nb; ++i) {
>>>          if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
>>>              break;
>>> @@ -1248,6 +1259,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>>>      mr->ioeventfds = g_realloc(mr->ioeventfds,
>>>                                    sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  static void memory_region_add_subregion_common(MemoryRegion *mr,
>>> @@ -1259,6 +1271,8 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>>>      assert(!subregion->parent);
>>>      subregion->parent = mr;
>>>      subregion->addr = offset;
>>> +
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
>>>          if (subregion->may_overlap || other->may_overlap) {
>>>              continue;
>>> @@ -1289,6 +1303,7 @@ static void memory_region_add_subregion_common(MemoryRegion *mr,
>>>      QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
>>>  done:
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>
>>> @@ -1316,8 +1331,11 @@ void memory_region_del_subregion(MemoryRegion *mr,
>>>  {
>>>      assert(subregion->parent == mr);
>>>      subregion->parent = NULL;
>>> +
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
>>> @@ -1325,8 +1343,10 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
>>>      if (enabled == mr->enabled) {
>>>          return;
>>>      }
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      mr->enabled = enabled;
>>>      memory_region_update_topology(NULL);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr)
>>> @@ -1361,7 +1381,9 @@ void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset)
>>>          return;
>>>      }
>>>
>>> +    qemu_mutex_lock(&mem_map_lock);
>>>      memory_region_update_topology(mr);
>>> +    qemu_mutex_unlock(&mem_map_lock);
>>>  }
>>>
>>>  ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
>>> diff --git a/memory.h b/memory.h
>>> index 740c48e..fe6aefa 100644
>>> --- a/memory.h
>>> +++ b/memory.h
>>> @@ -25,6 +25,7 @@
>>>  #include "iorange.h"
>>>  #include "ioport.h"
>>>  #include "int128.h"
>>> +#include "qemu-thread.h"
>>>
>>>  typedef struct MemoryRegionOps MemoryRegionOps;
>>>  typedef struct MemoryRegion MemoryRegion;
>>> @@ -207,6 +208,7 @@ struct MemoryListener {
>>>      QTAILQ_ENTRY(MemoryListener) link;
>>>  };
>>>
>>> +extern QemuMutex mem_map_lock;
>>>  /**
>>>   * memory_region_init: Initialize a memory region
>>>   *
>>> --
>>> 1.7.4.4
>>>
pingfan liu - Aug. 10, 2012, 6:44 a.m.
On Thu, Aug 9, 2012 at 4:24 PM, Avi Kivity <avi@redhat.com> wrote:
> On 08/09/2012 10:28 AM, liu ping fan wrote:
>>>
>>> Seems to me that nothing in memory.c can susceptible to races.  It must
>>> already be called under the big qemu lock, and with the exception of
>>> mutators (memory_region_set_*), changes aren't directly visible.
>>>
>> Yes, what I want to do is "prepare unplug out of protection of global
>> lock".  When io-dispatch and mmio-dispatch are all out of big lock, we
>> will run into the following scene:
>>     In vcpu context A, qdev_unplug_complete()-> delete subregion;
>>     In context B, write pci bar --> pci mapping update    -> add subregion
>
> Why do you want unlocked unplug?  Unplug is rare and complicated; there
> are no performance considerations on one hand, and difficulty of testing
> for lock correctness on the other.  I think it is better if it remains
> protected by the global lock.
>
Oh, yes! I deviate quite far from the origin aim, and introduce some
unnecessary complicate.

>>
>>> I think it's sufficient to take the mem_map_lock at the beginning of
>>> core_begin() and drop it at the end of core_commit().  That means all
>>> updates of volatile state, phys_map, are protected.
>>>
>> The mem_map_lock is to protect both address_space_io and
>> address_space_memory. When without the protection of big lock,
>> competing will raise among the updaters
>> (memory_region_{add,del}_subregion and the readers
>> generate_memory_topology()->render_memory_region().
>
> These should all run under the big qemu lock, for the same reasons.
> They are rare and not performance sensitive.  Only phys_map reads are
> performance sensitive.
>
OK, I see. Leave the big lock as it is, except for mmio, we will not
worry about it.
>>
>> If just in core_begin/commit, we will duplicate it for
>> xx_begin/commit, right?
>
> No.  Other listeners will be protected by the global lock.
>
Yes, if leave the big lock as it is.
>> And at the same time, mr->subregions is
>> exposed under SMP without big lock.
>>
>
> Who accesses it?
>
Again, I assume the updaters out of the protection of the big lock

> IMO locking should look like:
>
>   phys_map: mem_map_lock
>   dispatch callbacks: device specific lock (or big qemu lock for
> unconverted devices)
>   everything else: big qemu lock
>
I See. Thank you for the review. And I will eliminate the unnecessary
complicate and effort for the next version

Regards,
pingfan
>
>
> --
> error compiling committee.c: too many arguments to function
Marcelo Tosatti - Aug. 13, 2012, 6:28 p.m.
On Thu, Aug 09, 2012 at 03:28:44PM +0800, liu ping fan wrote:
> On Wed, Aug 8, 2012 at 5:13 PM, Avi Kivity <avi@redhat.com> wrote:
> > On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
> >> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >>
> >> Using mem_map_lock to protect among updaters. So we can get the intact
> >> snapshot of mem topology -- FlatView & radix-tree.
> >>
> >> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >> ---
> >>  exec.c   |    3 +++
> >>  memory.c |   22 ++++++++++++++++++++++
> >>  memory.h |    2 ++
> >>  3 files changed, 27 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/exec.c b/exec.c
> >> index 8244d54..0e29ef9 100644
> >> --- a/exec.c
> >> +++ b/exec.c
> >> @@ -210,6 +210,8 @@ static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> >>     The bottom level has pointers to MemoryRegionSections.  */
> >>  static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> >>
> >> +QemuMutex mem_map_lock;
> >> +
> >>  static void io_mem_init(void);
> >>  static void memory_map_init(void);
> >>
> >> @@ -637,6 +639,7 @@ void cpu_exec_init_all(void)
> >>  #if !defined(CONFIG_USER_ONLY)
> >>      memory_map_init();
> >>      io_mem_init();
> >> +    qemu_mutex_init(&mem_map_lock);
> >>  #endif
> >>  }
> >>
> >> diff --git a/memory.c b/memory.c
> >> index aab4a31..5986532 100644
> >> --- a/memory.c
> >> +++ b/memory.c
> >> @@ -761,7 +761,9 @@ void memory_region_transaction_commit(void)
> >>      assert(memory_region_transaction_depth);
> >>      --memory_region_transaction_depth;
> >>      if (!memory_region_transaction_depth && memory_region_update_pending) {
> >> +        qemu_mutex_lock(&mem_map_lock);
> >>          memory_region_update_topology(NULL);
> >> +        qemu_mutex_unlock(&mem_map_lock);
> >>      }
> >>  }
> >
> > Seems to me that nothing in memory.c can susceptible to races.  It must
> > already be called under the big qemu lock, and with the exception of
> > mutators (memory_region_set_*), changes aren't directly visible.
> >
> Yes, what I want to do is "prepare unplug out of protection of global
> lock".  When io-dispatch and mmio-dispatch are all out of big lock, we
> will run into the following scene:
>     In vcpu context A, qdev_unplug_complete()-> delete subregion;
>     In context B, write pci bar --> pci mapping update    -> add subregion

Per device lock should protect that.

Patch

diff --git a/exec.c b/exec.c
index 8244d54..0e29ef9 100644
--- a/exec.c
+++ b/exec.c
@@ -210,6 +210,8 @@  static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
    The bottom level has pointers to MemoryRegionSections.  */
 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
 
+QemuMutex mem_map_lock;
+
 static void io_mem_init(void);
 static void memory_map_init(void);
 
@@ -637,6 +639,7 @@  void cpu_exec_init_all(void)
 #if !defined(CONFIG_USER_ONLY)
     memory_map_init();
     io_mem_init();
+    qemu_mutex_init(&mem_map_lock);
 #endif
 }
 
diff --git a/memory.c b/memory.c
index aab4a31..5986532 100644
--- a/memory.c
+++ b/memory.c
@@ -761,7 +761,9 @@  void memory_region_transaction_commit(void)
     assert(memory_region_transaction_depth);
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth && memory_region_update_pending) {
+        qemu_mutex_lock(&mem_map_lock);
         memory_region_update_topology(NULL);
+        qemu_mutex_unlock(&mem_map_lock);
     }
 }
 
@@ -1069,8 +1071,10 @@  void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
 
+    qemu_mutex_lock(&mem_map_lock);
     mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
@@ -1103,8 +1107,10 @@  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
 {
     if (mr->readonly != readonly) {
+        qemu_mutex_lock(&mem_map_lock);
         mr->readonly = readonly;
         memory_region_update_topology(mr);
+        qemu_mutex_unlock(&mem_map_lock);
     }
 }
 
@@ -1112,7 +1118,9 @@  void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
 {
     if (mr->readable != readable) {
         mr->readable = readable;
+        qemu_mutex_lock(&mem_map_lock);
         memory_region_update_topology(mr);
+        qemu_mutex_unlock(&mem_map_lock);
     }
 }
 
@@ -1206,6 +1214,7 @@  void memory_region_add_eventfd(MemoryRegion *mr,
     };
     unsigned i;
 
+    qemu_mutex_lock(&mem_map_lock);
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
             break;
@@ -1218,6 +1227,7 @@  void memory_region_add_eventfd(MemoryRegion *mr,
             sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
     mr->ioeventfds[i] = mrfd;
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 void memory_region_del_eventfd(MemoryRegion *mr,
@@ -1236,6 +1246,7 @@  void memory_region_del_eventfd(MemoryRegion *mr,
     };
     unsigned i;
 
+    qemu_mutex_lock(&mem_map_lock);
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
             break;
@@ -1248,6 +1259,7 @@  void memory_region_del_eventfd(MemoryRegion *mr,
     mr->ioeventfds = g_realloc(mr->ioeventfds,
                                   sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 static void memory_region_add_subregion_common(MemoryRegion *mr,
@@ -1259,6 +1271,8 @@  static void memory_region_add_subregion_common(MemoryRegion *mr,
     assert(!subregion->parent);
     subregion->parent = mr;
     subregion->addr = offset;
+
+    qemu_mutex_lock(&mem_map_lock);
     QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
         if (subregion->may_overlap || other->may_overlap) {
             continue;
@@ -1289,6 +1303,7 @@  static void memory_region_add_subregion_common(MemoryRegion *mr,
     QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
 done:
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 
@@ -1316,8 +1331,11 @@  void memory_region_del_subregion(MemoryRegion *mr,
 {
     assert(subregion->parent == mr);
     subregion->parent = NULL;
+
+    qemu_mutex_lock(&mem_map_lock);
     QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
@@ -1325,8 +1343,10 @@  void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
     if (enabled == mr->enabled) {
         return;
     }
+    qemu_mutex_lock(&mem_map_lock);
     mr->enabled = enabled;
     memory_region_update_topology(NULL);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr)
@@ -1361,7 +1381,9 @@  void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset)
         return;
     }
 
+    qemu_mutex_lock(&mem_map_lock);
     memory_region_update_topology(mr);
+    qemu_mutex_unlock(&mem_map_lock);
 }
 
 ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
diff --git a/memory.h b/memory.h
index 740c48e..fe6aefa 100644
--- a/memory.h
+++ b/memory.h
@@ -25,6 +25,7 @@ 
 #include "iorange.h"
 #include "ioport.h"
 #include "int128.h"
+#include "qemu-thread.h"
 
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegion MemoryRegion;
@@ -207,6 +208,7 @@  struct MemoryListener {
     QTAILQ_ENTRY(MemoryListener) link;
 };
 
+extern QemuMutex mem_map_lock;
 /**
  * memory_region_init: Initialize a memory region
  *