diff mbox

[10/15] exec: RCUify AddressSpaceDispatch

Message ID 1421938053-10318-11-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Jan. 22, 2015, 2:47 p.m. UTC
Note that even after this patch, most callers of address_space_*
functions must still be under the big QEMU lock, otherwise the memory
region returned by address_space_translate can disappear as soon as
address_space_translate returns.  This will be fixed in the next part
of this series.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c              | 13 ++++++++++++-
 cpus.c                  |  2 +-
 cputlb.c                |  8 ++++++--
 exec.c                  | 34 ++++++++++++++++++++++++++--------
 hw/i386/intel_iommu.c   |  3 +++
 hw/pci-host/apb.c       |  1 +
 hw/ppc/spapr_iommu.c    |  1 +
 include/exec/exec-all.h |  1 +
 8 files changed, 51 insertions(+), 12 deletions(-)

Comments

Fam Zheng Jan. 28, 2015, 5:45 a.m. UTC | #1
On Thu, 01/22 15:47, Paolo Bonzini wrote:
> Note that even after this patch, most callers of address_space_*
> functions must still be under the big QEMU lock, otherwise the memory
> region returned by address_space_translate can disappear as soon as
> address_space_translate returns.  This will be fixed in the next part
> of this series.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cpu-exec.c              | 13 ++++++++++++-
>  cpus.c                  |  2 +-
>  cputlb.c                |  8 ++++++--
>  exec.c                  | 34 ++++++++++++++++++++++++++--------
>  hw/i386/intel_iommu.c   |  3 +++
>  hw/pci-host/apb.c       |  1 +
>  hw/ppc/spapr_iommu.c    |  1 +
>  include/exec/exec-all.h |  1 +
>  8 files changed, 51 insertions(+), 12 deletions(-)
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 12473ff..edc5eb9 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -26,6 +26,7 @@
>  #include "qemu/timer.h"
>  #include "exec/address-spaces.h"
>  #include "exec/memory-internal.h"
> +#include "qemu/rcu.h"
>  
>  /* -icount align implementation. */
>  
> @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
>  
>  void cpu_reload_memory_map(CPUState *cpu)
>  {
> +    AddressSpaceDispatch *d;
> +
> +    if (qemu_in_vcpu_thread()) {
> +        rcu_read_unlock();

Could you explain why we need to split the critical section? Maybe a line of
comment helps here.

> +        rcu_read_lock();
> +    }
> +
>      /* The CPU and TLB are protected by the iothread lock.  */
> -    AddressSpaceDispatch *d = cpu->as->dispatch;
> +    d = atomic_rcu_read(&cpu->as->dispatch);
>      cpu->memory_dispatch = d;
>      tlb_flush(cpu, 1);
>  }
> @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env)
>       * an instruction scheduling constraint on modern architectures.  */
>      smp_mb();
>  
> +    rcu_read_lock();
> +
>      if (unlikely(exit_request)) {
>          cpu->exit_request = 1;
>      }
> @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env)
>      } /* for(;;) */
>  
>      cc->cpu_exec_exit(cpu);
> +    rcu_read_unlock();
>  
>      /* fail safe : never use current_cpu outside cpu_exec() */
>      current_cpu = NULL;
> diff --git a/cpus.c b/cpus.c
> index 3a5323b..b02c793 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu)
>      return qemu_thread_is_self(cpu->thread);
>  }
>  
> -static bool qemu_in_vcpu_thread(void)
> +bool qemu_in_vcpu_thread(void)
>  {
>      return current_cpu && qemu_cpu_is_self(current_cpu);
>  }
> diff --git a/cputlb.c b/cputlb.c
> index f92db5e..38f2151 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
>  }
>  
>  /* Add a new TLB entry. At most one entry for a given virtual address
> -   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
> -   supplied size is only used by tlb_flush_page.  */
> + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
> + * supplied size is only used by tlb_flush_page.
> + *
> + * Called from TCG-generated code, which is under an RCU read-side
> + * critical section.
> + */
>  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>                    hwaddr paddr, int prot,
>                    int mmu_idx, target_ulong size)
> diff --git a/exec.c b/exec.c
> index 762ec76..262e8bc 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -115,6 +115,8 @@ struct PhysPageEntry {
>  typedef PhysPageEntry Node[P_L2_SIZE];
>  
>  typedef struct PhysPageMap {
> +    struct rcu_head rcu;
> +
>      unsigned sections_nb;
>      unsigned sections_nb_alloc;
>      unsigned nodes_nb;
> @@ -124,6 +126,8 @@ typedef struct PhysPageMap {
>  } PhysPageMap;
>  
>  struct AddressSpaceDispatch {
> +    struct rcu_head rcu;
> +
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
> @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>          && mr != &io_mem_watch;
>  }
>  
> +/* Called from RCU critical section */
>  static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>                                                          hwaddr addr,
>                                                          bool resolve_subpage)
> @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      return section;
>  }
>  
> +/* Called from RCU critical section */
>  static MemoryRegionSection *
>  address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
>                                   hwaddr *plen, bool resolve_subpage)
> @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>      MemoryRegion *mr;
>      hwaddr len = *plen;
>  
> +    rcu_read_lock();
>      for (;;) {
> -        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
> +        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
> +        section = address_space_translate_internal(d, addr, &addr, plen, true);
>          mr = section->mr;
>  
>          if (!mr->iommu_ops) {
> @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>  
>      *plen = len;
>      *xlat = addr;
> +    rcu_read_unlock();
>      return mr;
>  }
>  
> +/* Called from RCU critical section */
>  MemoryRegionSection *
>  address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
>                                    hwaddr *xlat, hwaddr *plen)
> @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable)
>      in_migration = enable;
>  }
>  
> +/* Called from RCU critical section */
>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,
>                                         MemoryRegionSection *section,
>                                         target_ulong vaddr,
> @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
>  
>  MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
>  {
> -    MemoryRegionSection *sections = cpu->memory_dispatch->map.sections;
> +    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
> +    MemoryRegionSection *sections = d->map.sections;
>  
>      return sections[index & ~TARGET_PAGE_MASK].mr;
>  }
> @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener)
>      as->next_dispatch = d;
>  }
>  
> +static void address_space_dispatch_free(AddressSpaceDispatch *d)
> +{
> +    phys_sections_free(&d->map);


If I understand it, this code doesn't hold iothread lock when releasing the
memory region, but in one of the memroy region destructors,
memory_region_destructor_ram_from_ptr:

    void qemu_ram_free_from_ptr(ram_addr_t addr)
    {
        RAMBlock *block;

        /* This assumes the iothread lock is taken here too.  */
        qemu_mutex_lock_ramlist();
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
...

Is the comment stale or I missed something?

Fam

> +    g_free(d);
> +}
> +
>  static void mem_commit(MemoryListener *listener)
>  {
>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener)
>  
>      phys_page_compact_all(next, next->map.nodes_nb);
>  
> -    as->dispatch = next;
> -
> +    atomic_rcu_set(&as->dispatch, next);
>      if (cur) {
> -        phys_sections_free(&cur->map);
> -        g_free(cur);
> +        call_rcu(cur, address_space_dispatch_free, rcu);
>      }
>  }
>  
> @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as)
>      AddressSpaceDispatch *d = as->dispatch;
>  
>      memory_listener_unregister(&as->dispatch_listener);
> -    g_free(d);
> -    as->dispatch = NULL;
> +    atomic_rcu_set(&as->dispatch, NULL);
> +    if (d) {
> +        call_rcu(d, address_space_dispatch_free, rcu);
> +    }
>  }
>  
>  static void memory_map_init(void)
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 0a4282a..7da70ff 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>  
>  /* Map dev to context-entry then do a paging-structures walk to do a iommu
>   * translation.
> + *
> + * Called from RCU critical section.
> + *
>   * @bus_num: The bus number
>   * @devfn: The devfn, which is the  combined of device and function number
>   * @is_write: The access is a write operation
> diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
> index f573875..832b6c7 100644
> --- a/hw/pci-host/apb.c
> +++ b/hw/pci-host/apb.c
> @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>      return &is->iommu_as;
>  }
>  
> +/* Called from RCU critical section */
>  static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>                                           bool is_write)
>  {
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index da47474..ba003da 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
>      return NULL;
>  }
>  
> +/* Called from RCU critical section */
>  static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>                                                 bool is_write)
>  {
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index bb3fd37..8eb0db3 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>  void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
>                                int is_cpu_write_access);
>  #if !defined(CONFIG_USER_ONLY)
> +bool qemu_in_vcpu_thread(void);
>  void cpu_reload_memory_map(CPUState *cpu);
>  void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
>  /* cputlb.c */
> -- 
> 1.8.3.1
> 
>
Paolo Bonzini Jan. 28, 2015, 9:44 a.m. UTC | #2
On 28/01/2015 06:45, Fam Zheng wrote:
> On Thu, 01/22 15:47, Paolo Bonzini wrote:
>> Note that even after this patch, most callers of address_space_*
>> functions must still be under the big QEMU lock, otherwise the memory
>> region returned by address_space_translate can disappear as soon as
>> address_space_translate returns.  This will be fixed in the next part
>> of this series.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  cpu-exec.c              | 13 ++++++++++++-
>>  cpus.c                  |  2 +-
>>  cputlb.c                |  8 ++++++--
>>  exec.c                  | 34 ++++++++++++++++++++++++++--------
>>  hw/i386/intel_iommu.c   |  3 +++
>>  hw/pci-host/apb.c       |  1 +
>>  hw/ppc/spapr_iommu.c    |  1 +
>>  include/exec/exec-all.h |  1 +
>>  8 files changed, 51 insertions(+), 12 deletions(-)
>>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 12473ff..edc5eb9 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>> @@ -26,6 +26,7 @@
>>  #include "qemu/timer.h"
>>  #include "exec/address-spaces.h"
>>  #include "exec/memory-internal.h"
>> +#include "qemu/rcu.h"
>>  
>>  /* -icount align implementation. */
>>  
>> @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
>>  
>>  void cpu_reload_memory_map(CPUState *cpu)
>>  {
>> +    AddressSpaceDispatch *d;
>> +
>> +    if (qemu_in_vcpu_thread()) {
>> +        rcu_read_unlock();
> 
> Could you explain why we need to split the critical section? Maybe a line of
> comment helps here.

It is needed because otherwise the guest could prolong the critical
section as much as it desires.

Currently, this is prevented by the I/O thread's kicking of the VCPU
thread (iothread_requesting_mutex, qemu_cpu_kick_thread) but this will
go away once TCG's execution moves out of the global mutex.

This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
only protects cpu->as->dispatch.  Since we reload it below, we can split
the critical section.

Paolo

>> +        rcu_read_lock();
>> +    }
>> +
>>      /* The CPU and TLB are protected by the iothread lock.  */
>> -    AddressSpaceDispatch *d = cpu->as->dispatch;
>> +    d = atomic_rcu_read(&cpu->as->dispatch);
>>      cpu->memory_dispatch = d;
>>      tlb_flush(cpu, 1);
>>  }
>> @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env)
>>       * an instruction scheduling constraint on modern architectures.  */
>>      smp_mb();
>>  
>> +    rcu_read_lock();
>> +
>>      if (unlikely(exit_request)) {
>>          cpu->exit_request = 1;
>>      }
>> @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env)
>>      } /* for(;;) */
>>  
>>      cc->cpu_exec_exit(cpu);
>> +    rcu_read_unlock();
>>  
>>      /* fail safe : never use current_cpu outside cpu_exec() */
>>      current_cpu = NULL;
>> diff --git a/cpus.c b/cpus.c
>> index 3a5323b..b02c793 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu)
>>      return qemu_thread_is_self(cpu->thread);
>>  }
>>  
>> -static bool qemu_in_vcpu_thread(void)
>> +bool qemu_in_vcpu_thread(void)
>>  {
>>      return current_cpu && qemu_cpu_is_self(current_cpu);
>>  }
>> diff --git a/cputlb.c b/cputlb.c
>> index f92db5e..38f2151 100644
>> --- a/cputlb.c
>> +++ b/cputlb.c
>> @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
>>  }
>>  
>>  /* Add a new TLB entry. At most one entry for a given virtual address
>> -   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
>> -   supplied size is only used by tlb_flush_page.  */
>> + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
>> + * supplied size is only used by tlb_flush_page.
>> + *
>> + * Called from TCG-generated code, which is under an RCU read-side
>> + * critical section.
>> + */
>>  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>>                    hwaddr paddr, int prot,
>>                    int mmu_idx, target_ulong size)
>> diff --git a/exec.c b/exec.c
>> index 762ec76..262e8bc 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -115,6 +115,8 @@ struct PhysPageEntry {
>>  typedef PhysPageEntry Node[P_L2_SIZE];
>>  
>>  typedef struct PhysPageMap {
>> +    struct rcu_head rcu;
>> +
>>      unsigned sections_nb;
>>      unsigned sections_nb_alloc;
>>      unsigned nodes_nb;
>> @@ -124,6 +126,8 @@ typedef struct PhysPageMap {
>>  } PhysPageMap;
>>  
>>  struct AddressSpaceDispatch {
>> +    struct rcu_head rcu;
>> +
>>      /* This is a multi-level map on the physical address space.
>>       * The bottom level has pointers to MemoryRegionSections.
>>       */
>> @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>>          && mr != &io_mem_watch;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>>                                                          hwaddr addr,
>>                                                          bool resolve_subpage)
>> @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>>      return section;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  static MemoryRegionSection *
>>  address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
>>                                   hwaddr *plen, bool resolve_subpage)
>> @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>>      MemoryRegion *mr;
>>      hwaddr len = *plen;
>>  
>> +    rcu_read_lock();
>>      for (;;) {
>> -        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
>> +        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
>> +        section = address_space_translate_internal(d, addr, &addr, plen, true);
>>          mr = section->mr;
>>  
>>          if (!mr->iommu_ops) {
>> @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>>  
>>      *plen = len;
>>      *xlat = addr;
>> +    rcu_read_unlock();
>>      return mr;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  MemoryRegionSection *
>>  address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
>>                                    hwaddr *xlat, hwaddr *plen)
>> @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable)
>>      in_migration = enable;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,
>>                                         MemoryRegionSection *section,
>>                                         target_ulong vaddr,
>> @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
>>  
>>  MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
>>  {
>> -    MemoryRegionSection *sections = cpu->memory_dispatch->map.sections;
>> +    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
>> +    MemoryRegionSection *sections = d->map.sections;
>>  
>>      return sections[index & ~TARGET_PAGE_MASK].mr;
>>  }
>> @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener)
>>      as->next_dispatch = d;
>>  }
>>  
>> +static void address_space_dispatch_free(AddressSpaceDispatch *d)
>> +{
>> +    phys_sections_free(&d->map);
> 
> 
> If I understand it, this code doesn't hold iothread lock when releasing the
> memory region, but in one of the memroy region destructors,
> memory_region_destructor_ram_from_ptr:
> 
>     void qemu_ram_free_from_ptr(ram_addr_t addr)
>     {
>         RAMBlock *block;
> 
>         /* This assumes the iothread lock is taken here too.  */
>         qemu_mutex_lock_ramlist();
>         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> ...
> 
> Is the comment stale or I missed something?
> 
> Fam
> 
>> +    g_free(d);
>> +}
>> +
>>  static void mem_commit(MemoryListener *listener)
>>  {
>>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
>> @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener)
>>  
>>      phys_page_compact_all(next, next->map.nodes_nb);
>>  
>> -    as->dispatch = next;
>> -
>> +    atomic_rcu_set(&as->dispatch, next);
>>      if (cur) {
>> -        phys_sections_free(&cur->map);
>> -        g_free(cur);
>> +        call_rcu(cur, address_space_dispatch_free, rcu);
>>      }
>>  }
>>  
>> @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as)
>>      AddressSpaceDispatch *d = as->dispatch;
>>  
>>      memory_listener_unregister(&as->dispatch_listener);
>> -    g_free(d);
>> -    as->dispatch = NULL;
>> +    atomic_rcu_set(&as->dispatch, NULL);
>> +    if (d) {
>> +        call_rcu(d, address_space_dispatch_free, rcu);
>> +    }
>>  }
>>  
>>  static void memory_map_init(void)
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 0a4282a..7da70ff 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>>  
>>  /* Map dev to context-entry then do a paging-structures walk to do a iommu
>>   * translation.
>> + *
>> + * Called from RCU critical section.
>> + *
>>   * @bus_num: The bus number
>>   * @devfn: The devfn, which is the  combined of device and function number
>>   * @is_write: The access is a write operation
>> diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
>> index f573875..832b6c7 100644
>> --- a/hw/pci-host/apb.c
>> +++ b/hw/pci-host/apb.c
>> @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>      return &is->iommu_as;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>                                           bool is_write)
>>  {
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index da47474..ba003da 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
>>      return NULL;
>>  }
>>  
>> +/* Called from RCU critical section */
>>  static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>                                                 bool is_write)
>>  {
>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> index bb3fd37..8eb0db3 100644
>> --- a/include/exec/exec-all.h
>> +++ b/include/exec/exec-all.h
>> @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>  void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
>>                                int is_cpu_write_access);
>>  #if !defined(CONFIG_USER_ONLY)
>> +bool qemu_in_vcpu_thread(void);
>>  void cpu_reload_memory_map(CPUState *cpu);
>>  void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
>>  /* cputlb.c */
>> -- 
>> 1.8.3.1
>>
>>
Paolo Bonzini Feb. 3, 2015, 10:07 a.m. UTC | #3
On 28/01/2015 06:45, Fam Zheng wrote:
> If I understand it, this code doesn't hold iothread lock when releasing the
> memory region, but in one of the memroy region destructors,
> memory_region_destructor_ram_from_ptr:
> 
>     void qemu_ram_free_from_ptr(ram_addr_t addr)
>     {
>         RAMBlock *block;
> 
>         /* This assumes the iothread lock is taken here too.  */
>         qemu_mutex_lock_ramlist();
>         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> ...
> 
> Is the comment stale or I missed something?

No, you're right.

Paolo
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 12473ff..edc5eb9 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -26,6 +26,7 @@ 
 #include "qemu/timer.h"
 #include "exec/address-spaces.h"
 #include "exec/memory-internal.h"
+#include "qemu/rcu.h"
 
 /* -icount align implementation. */
 
@@ -149,8 +150,15 @@  void cpu_resume_from_signal(CPUState *cpu, void *puc)
 
 void cpu_reload_memory_map(CPUState *cpu)
 {
+    AddressSpaceDispatch *d;
+
+    if (qemu_in_vcpu_thread()) {
+        rcu_read_unlock();
+        rcu_read_lock();
+    }
+
     /* The CPU and TLB are protected by the iothread lock.  */
-    AddressSpaceDispatch *d = cpu->as->dispatch;
+    d = atomic_rcu_read(&cpu->as->dispatch);
     cpu->memory_dispatch = d;
     tlb_flush(cpu, 1);
 }
@@ -365,6 +373,8 @@  int cpu_exec(CPUArchState *env)
      * an instruction scheduling constraint on modern architectures.  */
     smp_mb();
 
+    rcu_read_lock();
+
     if (unlikely(exit_request)) {
         cpu->exit_request = 1;
     }
@@ -567,6 +577,7 @@  int cpu_exec(CPUArchState *env)
     } /* for(;;) */
 
     cc->cpu_exec_exit(cpu);
+    rcu_read_unlock();
 
     /* fail safe : never use current_cpu outside cpu_exec() */
     current_cpu = NULL;
diff --git a/cpus.c b/cpus.c
index 3a5323b..b02c793 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1121,7 +1121,7 @@  bool qemu_cpu_is_self(CPUState *cpu)
     return qemu_thread_is_self(cpu->thread);
 }
 
-static bool qemu_in_vcpu_thread(void)
+bool qemu_in_vcpu_thread(void)
 {
     return current_cpu && qemu_cpu_is_self(current_cpu);
 }
diff --git a/cputlb.c b/cputlb.c
index f92db5e..38f2151 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -243,8 +243,12 @@  static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
 }
 
 /* Add a new TLB entry. At most one entry for a given virtual address
-   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
-   supplied size is only used by tlb_flush_page.  */
+ * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+ * supplied size is only used by tlb_flush_page.
+ *
+ * Called from TCG-generated code, which is under an RCU read-side
+ * critical section.
+ */
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,
                   int mmu_idx, target_ulong size)
diff --git a/exec.c b/exec.c
index 762ec76..262e8bc 100644
--- a/exec.c
+++ b/exec.c
@@ -115,6 +115,8 @@  struct PhysPageEntry {
 typedef PhysPageEntry Node[P_L2_SIZE];
 
 typedef struct PhysPageMap {
+    struct rcu_head rcu;
+
     unsigned sections_nb;
     unsigned sections_nb_alloc;
     unsigned nodes_nb;
@@ -124,6 +126,8 @@  typedef struct PhysPageMap {
 } PhysPageMap;
 
 struct AddressSpaceDispatch {
+    struct rcu_head rcu;
+
     /* This is a multi-level map on the physical address space.
      * The bottom level has pointers to MemoryRegionSections.
      */
@@ -315,6 +319,7 @@  bool memory_region_is_unassigned(MemoryRegion *mr)
         && mr != &io_mem_watch;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
                                                         hwaddr addr,
                                                         bool resolve_subpage)
@@ -330,6 +335,7 @@  static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
     return section;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *
 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen, bool resolve_subpage)
@@ -370,8 +376,10 @@  MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     MemoryRegion *mr;
     hwaddr len = *plen;
 
+    rcu_read_lock();
     for (;;) {
-        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
+        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
+        section = address_space_translate_internal(d, addr, &addr, plen, true);
         mr = section->mr;
 
         if (!mr->iommu_ops) {
@@ -397,9 +405,11 @@  MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 
     *plen = len;
     *xlat = addr;
+    rcu_read_unlock();
     return mr;
 }
 
+/* Called from RCU critical section */
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                   hwaddr *xlat, hwaddr *plen)
@@ -852,6 +862,7 @@  static void cpu_physical_memory_set_dirty_tracking(bool enable)
     in_migration = enable;
 }
 
+/* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
                                        target_ulong vaddr,
@@ -1963,7 +1974,8 @@  static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
 
 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
 {
-    MemoryRegionSection *sections = cpu->memory_dispatch->map.sections;
+    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
+    MemoryRegionSection *sections = d->map.sections;
 
     return sections[index & ~TARGET_PAGE_MASK].mr;
 }
@@ -1999,6 +2011,12 @@  static void mem_begin(MemoryListener *listener)
     as->next_dispatch = d;
 }
 
+static void address_space_dispatch_free(AddressSpaceDispatch *d)
+{
+    phys_sections_free(&d->map);
+    g_free(d);
+}
+
 static void mem_commit(MemoryListener *listener)
 {
     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
@@ -2007,11 +2025,9 @@  static void mem_commit(MemoryListener *listener)
 
     phys_page_compact_all(next, next->map.nodes_nb);
 
-    as->dispatch = next;
-
+    atomic_rcu_set(&as->dispatch, next);
     if (cur) {
-        phys_sections_free(&cur->map);
-        g_free(cur);
+        call_rcu(cur, address_space_dispatch_free, rcu);
     }
 }
 
@@ -2066,8 +2082,10 @@  void address_space_destroy_dispatch(AddressSpace *as)
     AddressSpaceDispatch *d = as->dispatch;
 
     memory_listener_unregister(&as->dispatch_listener);
-    g_free(d);
-    as->dispatch = NULL;
+    atomic_rcu_set(&as->dispatch, NULL);
+    if (d) {
+        call_rcu(d, address_space_dispatch_free, rcu);
+    }
 }
 
 static void memory_map_init(void)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0a4282a..7da70ff 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -745,6 +745,9 @@  static inline bool vtd_is_interrupt_addr(hwaddr addr)
 
 /* Map dev to context-entry then do a paging-structures walk to do a iommu
  * translation.
+ *
+ * Called from RCU critical section.
+ *
  * @bus_num: The bus number
  * @devfn: The devfn, which is the  combined of device and function number
  * @is_write: The access is a write operation
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index f573875..832b6c7 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -205,6 +205,7 @@  static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &is->iommu_as;
 }
 
+/* Called from RCU critical section */
 static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                          bool is_write)
 {
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index da47474..ba003da 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -59,6 +59,7 @@  static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
+/* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                                bool is_write)
 {
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index bb3fd37..8eb0db3 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -96,6 +96,7 @@  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
                               int is_cpu_write_access);
 #if !defined(CONFIG_USER_ONLY)
+bool qemu_in_vcpu_thread(void);
 void cpu_reload_memory_map(CPUState *cpu);
 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
 /* cputlb.c */