Message ID | 1421938053-10318-11-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, 01/22 15:47, Paolo Bonzini wrote: > Note that even after this patch, most callers of address_space_* > functions must still be under the big QEMU lock, otherwise the memory > region returned by address_space_translate can disappear as soon as > address_space_translate returns. This will be fixed in the next part > of this series. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > cpu-exec.c | 13 ++++++++++++- > cpus.c | 2 +- > cputlb.c | 8 ++++++-- > exec.c | 34 ++++++++++++++++++++++++++-------- > hw/i386/intel_iommu.c | 3 +++ > hw/pci-host/apb.c | 1 + > hw/ppc/spapr_iommu.c | 1 + > include/exec/exec-all.h | 1 + > 8 files changed, 51 insertions(+), 12 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index 12473ff..edc5eb9 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -26,6 +26,7 @@ > #include "qemu/timer.h" > #include "exec/address-spaces.h" > #include "exec/memory-internal.h" > +#include "qemu/rcu.h" > > /* -icount align implementation. */ > > @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) > > void cpu_reload_memory_map(CPUState *cpu) > { > + AddressSpaceDispatch *d; > + > + if (qemu_in_vcpu_thread()) { > + rcu_read_unlock(); Could you explain why we need to split the critical section? Maybe a line of comment helps here. > + rcu_read_lock(); > + } > + > /* The CPU and TLB are protected by the iothread lock. */ > - AddressSpaceDispatch *d = cpu->as->dispatch; > + d = atomic_rcu_read(&cpu->as->dispatch); > cpu->memory_dispatch = d; > tlb_flush(cpu, 1); > } > @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env) > * an instruction scheduling constraint on modern architectures. */ > smp_mb(); > > + rcu_read_lock(); > + > if (unlikely(exit_request)) { > cpu->exit_request = 1; > } > @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env) > } /* for(;;) */ > > cc->cpu_exec_exit(cpu); > + rcu_read_unlock(); > > /* fail safe : never use current_cpu outside cpu_exec() */ > current_cpu = NULL; > diff --git a/cpus.c b/cpus.c > index 3a5323b..b02c793 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu) > return qemu_thread_is_self(cpu->thread); > } > > -static bool qemu_in_vcpu_thread(void) > +bool qemu_in_vcpu_thread(void) > { > return current_cpu && qemu_cpu_is_self(current_cpu); > } > diff --git a/cputlb.c b/cputlb.c > index f92db5e..38f2151 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, > } > > /* Add a new TLB entry. At most one entry for a given virtual address > - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the > - supplied size is only used by tlb_flush_page. */ > + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the > + * supplied size is only used by tlb_flush_page. > + * > + * Called from TCG-generated code, which is under an RCU read-side > + * critical section. > + */ > void tlb_set_page(CPUState *cpu, target_ulong vaddr, > hwaddr paddr, int prot, > int mmu_idx, target_ulong size) > diff --git a/exec.c b/exec.c > index 762ec76..262e8bc 100644 > --- a/exec.c > +++ b/exec.c > @@ -115,6 +115,8 @@ struct PhysPageEntry { > typedef PhysPageEntry Node[P_L2_SIZE]; > > typedef struct PhysPageMap { > + struct rcu_head rcu; > + > unsigned sections_nb; > unsigned sections_nb_alloc; > unsigned nodes_nb; > @@ -124,6 +126,8 @@ typedef struct PhysPageMap { > } PhysPageMap; > > struct AddressSpaceDispatch { > + struct rcu_head rcu; > + > /* This is a multi-level map on the physical address space. > * The bottom level has pointers to MemoryRegionSections. > */ > @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) > && mr != &io_mem_watch; > } > > +/* Called from RCU critical section */ > static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, > hwaddr addr, > bool resolve_subpage) > @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, > return section; > } > > +/* Called from RCU critical section */ > static MemoryRegionSection * > address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, > hwaddr *plen, bool resolve_subpage) > @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, > MemoryRegion *mr; > hwaddr len = *plen; > > + rcu_read_lock(); > for (;;) { > - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); > + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); > + section = address_space_translate_internal(d, addr, &addr, plen, true); > mr = section->mr; > > if (!mr->iommu_ops) { > @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, > > *plen = len; > *xlat = addr; > + rcu_read_unlock(); > return mr; > } > > +/* Called from RCU critical section */ > MemoryRegionSection * > address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, > hwaddr *xlat, hwaddr *plen) > @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) > in_migration = enable; > } > > +/* Called from RCU critical section */ > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > MemoryRegionSection *section, > target_ulong vaddr, > @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, > > MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) > { > - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections; > + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); > + MemoryRegionSection *sections = d->map.sections; > > return sections[index & ~TARGET_PAGE_MASK].mr; > } > @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener) > as->next_dispatch = d; > } > > +static void address_space_dispatch_free(AddressSpaceDispatch *d) > +{ > + phys_sections_free(&d->map); If I understand it, this code doesn't hold iothread lock when releasing the memory region, but in one of the memroy region destructors, memory_region_destructor_ram_from_ptr: void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); QTAILQ_FOREACH(block, &ram_list.blocks, next) { ... Is the comment stale or I missed something? Fam > + g_free(d); > +} > + > static void mem_commit(MemoryListener *listener) > { > AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); > @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener) > > phys_page_compact_all(next, next->map.nodes_nb); > > - as->dispatch = next; > - > + atomic_rcu_set(&as->dispatch, next); > if (cur) { > - phys_sections_free(&cur->map); > - g_free(cur); > + call_rcu(cur, address_space_dispatch_free, rcu); > } > } > > @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as) > AddressSpaceDispatch *d = as->dispatch; > > memory_listener_unregister(&as->dispatch_listener); > - g_free(d); > - as->dispatch = NULL; > + atomic_rcu_set(&as->dispatch, NULL); > + if (d) { > + call_rcu(d, address_space_dispatch_free, rcu); > + } > } > > static void memory_map_init(void) > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 0a4282a..7da70ff 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) > > /* Map dev to context-entry then do a paging-structures walk to do a iommu > * translation. > + * > + * Called from RCU critical section. > + * > * @bus_num: The bus number > * @devfn: The devfn, which is the combined of device and function number > * @is_write: The access is a write operation > diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c > index f573875..832b6c7 100644 > --- a/hw/pci-host/apb.c > +++ b/hw/pci-host/apb.c > @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) > return &is->iommu_as; > } > > +/* Called from RCU critical section */ > static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, > bool is_write) > { > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > index da47474..ba003da 100644 > --- a/hw/ppc/spapr_iommu.c > +++ b/hw/ppc/spapr_iommu.c > @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) > return NULL; > } > > +/* Called from RCU critical section */ > static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, > bool is_write) > { > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index bb3fd37..8eb0db3 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, > int is_cpu_write_access); > #if !defined(CONFIG_USER_ONLY) > +bool qemu_in_vcpu_thread(void); > void cpu_reload_memory_map(CPUState *cpu); > void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); > /* cputlb.c */ > -- > 1.8.3.1 > >
On 28/01/2015 06:45, Fam Zheng wrote: > On Thu, 01/22 15:47, Paolo Bonzini wrote: >> Note that even after this patch, most callers of address_space_* >> functions must still be under the big QEMU lock, otherwise the memory >> region returned by address_space_translate can disappear as soon as >> address_space_translate returns. This will be fixed in the next part >> of this series. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> cpu-exec.c | 13 ++++++++++++- >> cpus.c | 2 +- >> cputlb.c | 8 ++++++-- >> exec.c | 34 ++++++++++++++++++++++++++-------- >> hw/i386/intel_iommu.c | 3 +++ >> hw/pci-host/apb.c | 1 + >> hw/ppc/spapr_iommu.c | 1 + >> include/exec/exec-all.h | 1 + >> 8 files changed, 51 insertions(+), 12 deletions(-) >> >> diff --git a/cpu-exec.c b/cpu-exec.c >> index 12473ff..edc5eb9 100644 >> --- a/cpu-exec.c >> +++ b/cpu-exec.c >> @@ -26,6 +26,7 @@ >> #include "qemu/timer.h" >> #include "exec/address-spaces.h" >> #include "exec/memory-internal.h" >> +#include "qemu/rcu.h" >> >> /* -icount align implementation. */ >> >> @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) >> >> void cpu_reload_memory_map(CPUState *cpu) >> { >> + AddressSpaceDispatch *d; >> + >> + if (qemu_in_vcpu_thread()) { >> + rcu_read_unlock(); > > Could you explain why we need to split the critical section? Maybe a line of > comment helps here. It is needed because otherwise the guest could prolong the critical section as much as it desires. Currently, this is prevented by the I/O thread's kicking of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread) but this will go away once TCG's execution moves out of the global mutex. This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which only protects cpu->as->dispatch. Since we reload it below, we can split the critical section. Paolo >> + rcu_read_lock(); >> + } >> + >> /* The CPU and TLB are protected by the iothread lock. */ >> - AddressSpaceDispatch *d = cpu->as->dispatch; >> + d = atomic_rcu_read(&cpu->as->dispatch); >> cpu->memory_dispatch = d; >> tlb_flush(cpu, 1); >> } >> @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env) >> * an instruction scheduling constraint on modern architectures. */ >> smp_mb(); >> >> + rcu_read_lock(); >> + >> if (unlikely(exit_request)) { >> cpu->exit_request = 1; >> } >> @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env) >> } /* for(;;) */ >> >> cc->cpu_exec_exit(cpu); >> + rcu_read_unlock(); >> >> /* fail safe : never use current_cpu outside cpu_exec() */ >> current_cpu = NULL; >> diff --git a/cpus.c b/cpus.c >> index 3a5323b..b02c793 100644 >> --- a/cpus.c >> +++ b/cpus.c >> @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu) >> return qemu_thread_is_self(cpu->thread); >> } >> >> -static bool qemu_in_vcpu_thread(void) >> +bool qemu_in_vcpu_thread(void) >> { >> return current_cpu && qemu_cpu_is_self(current_cpu); >> } >> diff --git a/cputlb.c b/cputlb.c >> index f92db5e..38f2151 100644 >> --- a/cputlb.c >> +++ b/cputlb.c >> @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, >> } >> >> /* Add a new TLB entry. At most one entry for a given virtual address >> - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the >> - supplied size is only used by tlb_flush_page. */ >> + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the >> + * supplied size is only used by tlb_flush_page. >> + * >> + * Called from TCG-generated code, which is under an RCU read-side >> + * critical section. >> + */ >> void tlb_set_page(CPUState *cpu, target_ulong vaddr, >> hwaddr paddr, int prot, >> int mmu_idx, target_ulong size) >> diff --git a/exec.c b/exec.c >> index 762ec76..262e8bc 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -115,6 +115,8 @@ struct PhysPageEntry { >> typedef PhysPageEntry Node[P_L2_SIZE]; >> >> typedef struct PhysPageMap { >> + struct rcu_head rcu; >> + >> unsigned sections_nb; >> unsigned sections_nb_alloc; >> unsigned nodes_nb; >> @@ -124,6 +126,8 @@ typedef struct PhysPageMap { >> } PhysPageMap; >> >> struct AddressSpaceDispatch { >> + struct rcu_head rcu; >> + >> /* This is a multi-level map on the physical address space. >> * The bottom level has pointers to MemoryRegionSections. >> */ >> @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) >> && mr != &io_mem_watch; >> } >> >> +/* Called from RCU critical section */ >> static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, >> hwaddr addr, >> bool resolve_subpage) >> @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, >> return section; >> } >> >> +/* Called from RCU critical section */ >> static MemoryRegionSection * >> address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, >> hwaddr *plen, bool resolve_subpage) >> @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, >> MemoryRegion *mr; >> hwaddr len = *plen; >> >> + rcu_read_lock(); >> for (;;) { >> - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); >> + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); >> + section = address_space_translate_internal(d, addr, &addr, plen, true); >> mr = section->mr; >> >> if (!mr->iommu_ops) { >> @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, >> >> *plen = len; >> *xlat = addr; >> + rcu_read_unlock(); >> return mr; >> } >> >> +/* Called from RCU critical section */ >> MemoryRegionSection * >> address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, >> hwaddr *xlat, hwaddr *plen) >> @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) >> in_migration = enable; >> } >> >> +/* Called from RCU critical section */ >> hwaddr memory_region_section_get_iotlb(CPUState *cpu, >> MemoryRegionSection *section, >> target_ulong vaddr, >> @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, >> >> MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) >> { >> - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections; >> + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); >> + MemoryRegionSection *sections = d->map.sections; >> >> return sections[index & ~TARGET_PAGE_MASK].mr; >> } >> @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener) >> as->next_dispatch = d; >> } >> >> +static void address_space_dispatch_free(AddressSpaceDispatch *d) >> +{ >> + phys_sections_free(&d->map); > > > If I understand it, this code doesn't hold iothread lock when releasing the > memory region, but in one of the memroy region destructors, > memory_region_destructor_ram_from_ptr: > > void qemu_ram_free_from_ptr(ram_addr_t addr) > { > RAMBlock *block; > > /* This assumes the iothread lock is taken here too. */ > qemu_mutex_lock_ramlist(); > QTAILQ_FOREACH(block, &ram_list.blocks, next) { > ... > > Is the comment stale or I missed something? > > Fam > >> + g_free(d); >> +} >> + >> static void mem_commit(MemoryListener *listener) >> { >> AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); >> @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener) >> >> phys_page_compact_all(next, next->map.nodes_nb); >> >> - as->dispatch = next; >> - >> + atomic_rcu_set(&as->dispatch, next); >> if (cur) { >> - phys_sections_free(&cur->map); >> - g_free(cur); >> + call_rcu(cur, address_space_dispatch_free, rcu); >> } >> } >> >> @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as) >> AddressSpaceDispatch *d = as->dispatch; >> >> memory_listener_unregister(&as->dispatch_listener); >> - g_free(d); >> - as->dispatch = NULL; >> + atomic_rcu_set(&as->dispatch, NULL); >> + if (d) { >> + call_rcu(d, address_space_dispatch_free, rcu); >> + } >> } >> >> static void memory_map_init(void) >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 0a4282a..7da70ff 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) >> >> /* Map dev to context-entry then do a paging-structures walk to do a iommu >> * translation. >> + * >> + * Called from RCU critical section. >> + * >> * @bus_num: The bus number >> * @devfn: The devfn, which is the combined of device and function number >> * @is_write: The access is a write operation >> diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c >> index f573875..832b6c7 100644 >> --- a/hw/pci-host/apb.c >> +++ b/hw/pci-host/apb.c >> @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) >> return &is->iommu_as; >> } >> >> +/* Called from RCU critical section */ >> static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, >> bool is_write) >> { >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >> index da47474..ba003da 100644 >> --- a/hw/ppc/spapr_iommu.c >> +++ b/hw/ppc/spapr_iommu.c >> @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) >> return NULL; >> } >> >> +/* Called from RCU critical section */ >> static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, >> bool is_write) >> { >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h >> index bb3fd37..8eb0db3 100644 >> --- a/include/exec/exec-all.h >> +++ b/include/exec/exec-all.h >> @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, >> int is_cpu_write_access); >> #if !defined(CONFIG_USER_ONLY) >> +bool qemu_in_vcpu_thread(void); >> void cpu_reload_memory_map(CPUState *cpu); >> void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); >> /* cputlb.c */ >> -- >> 1.8.3.1 >> >>
On 28/01/2015 06:45, Fam Zheng wrote: > If I understand it, this code doesn't hold iothread lock when releasing the > memory region, but in one of the memroy region destructors, > memory_region_destructor_ram_from_ptr: > > void qemu_ram_free_from_ptr(ram_addr_t addr) > { > RAMBlock *block; > > /* This assumes the iothread lock is taken here too. */ > qemu_mutex_lock_ramlist(); > QTAILQ_FOREACH(block, &ram_list.blocks, next) { > ... > > Is the comment stale or I missed something? No, you're right. Paolo
diff --git a/cpu-exec.c b/cpu-exec.c index 12473ff..edc5eb9 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -26,6 +26,7 @@ #include "qemu/timer.h" #include "exec/address-spaces.h" #include "exec/memory-internal.h" +#include "qemu/rcu.h" /* -icount align implementation. */ @@ -149,8 +150,15 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) void cpu_reload_memory_map(CPUState *cpu) { + AddressSpaceDispatch *d; + + if (qemu_in_vcpu_thread()) { + rcu_read_unlock(); + rcu_read_lock(); + } + /* The CPU and TLB are protected by the iothread lock. */ - AddressSpaceDispatch *d = cpu->as->dispatch; + d = atomic_rcu_read(&cpu->as->dispatch); cpu->memory_dispatch = d; tlb_flush(cpu, 1); } @@ -365,6 +373,8 @@ int cpu_exec(CPUArchState *env) * an instruction scheduling constraint on modern architectures. */ smp_mb(); + rcu_read_lock(); + if (unlikely(exit_request)) { cpu->exit_request = 1; } @@ -567,6 +577,7 @@ int cpu_exec(CPUArchState *env) } /* for(;;) */ cc->cpu_exec_exit(cpu); + rcu_read_unlock(); /* fail safe : never use current_cpu outside cpu_exec() */ current_cpu = NULL; diff --git a/cpus.c b/cpus.c index 3a5323b..b02c793 100644 --- a/cpus.c +++ b/cpus.c @@ -1121,7 +1121,7 @@ bool qemu_cpu_is_self(CPUState *cpu) return qemu_thread_is_self(cpu->thread); } -static bool qemu_in_vcpu_thread(void) +bool qemu_in_vcpu_thread(void) { return current_cpu && qemu_cpu_is_self(current_cpu); } diff --git a/cputlb.c b/cputlb.c index f92db5e..38f2151 100644 --- a/cputlb.c +++ b/cputlb.c @@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr, } /* Add a new TLB entry. At most one entry for a given virtual address - is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the - supplied size is only used by tlb_flush_page. */ + * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the + * supplied size is only used by tlb_flush_page. + * + * Called from TCG-generated code, which is under an RCU read-side + * critical section. + */ void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) diff --git a/exec.c b/exec.c index 762ec76..262e8bc 100644 --- a/exec.c +++ b/exec.c @@ -115,6 +115,8 @@ struct PhysPageEntry { typedef PhysPageEntry Node[P_L2_SIZE]; typedef struct PhysPageMap { + struct rcu_head rcu; + unsigned sections_nb; unsigned sections_nb_alloc; unsigned nodes_nb; @@ -124,6 +126,8 @@ typedef struct PhysPageMap { } PhysPageMap; struct AddressSpaceDispatch { + struct rcu_head rcu; + /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ @@ -315,6 +319,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_watch; } +/* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) @@ -330,6 +335,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, return section; } +/* Called from RCU critical section */ static MemoryRegionSection * address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool resolve_subpage) @@ -370,8 +376,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegion *mr; hwaddr len = *plen; + rcu_read_lock(); for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); + section = address_space_translate_internal(d, addr, &addr, plen, true); mr = section->mr; if (!mr->iommu_ops) { @@ -397,9 +405,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, *plen = len; *xlat = addr; + rcu_read_unlock(); return mr; } +/* Called from RCU critical section */ MemoryRegionSection * address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, hwaddr *xlat, hwaddr *plen) @@ -852,6 +862,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable) in_migration = enable; } +/* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, target_ulong vaddr, @@ -1963,7 +1974,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as, MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index) { - MemoryRegionSection *sections = cpu->memory_dispatch->map.sections; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch); + MemoryRegionSection *sections = d->map.sections; return sections[index & ~TARGET_PAGE_MASK].mr; } @@ -1999,6 +2011,12 @@ static void mem_begin(MemoryListener *listener) as->next_dispatch = d; } +static void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + phys_sections_free(&d->map); + g_free(d); +} + static void mem_commit(MemoryListener *listener) { AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); @@ -2007,11 +2025,9 @@ static void mem_commit(MemoryListener *listener) phys_page_compact_all(next, next->map.nodes_nb); - as->dispatch = next; - + atomic_rcu_set(&as->dispatch, next); if (cur) { - phys_sections_free(&cur->map); - g_free(cur); + call_rcu(cur, address_space_dispatch_free, rcu); } } @@ -2066,8 +2082,10 @@ void address_space_destroy_dispatch(AddressSpace *as) AddressSpaceDispatch *d = as->dispatch; memory_listener_unregister(&as->dispatch_listener); - g_free(d); - as->dispatch = NULL; + atomic_rcu_set(&as->dispatch, NULL); + if (d) { + call_rcu(d, address_space_dispatch_free, rcu); + } } static void memory_map_init(void) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0a4282a..7da70ff 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. + * + * Called from RCU critical section. + * * @bus_num: The bus number * @devfn: The devfn, which is the combined of device and function number * @is_write: The access is a write operation diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index f573875..832b6c7 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +/* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index da47474..ba003da 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } +/* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bb3fd37..8eb0db3 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -96,6 +96,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); #if !defined(CONFIG_USER_ONLY) +bool qemu_in_vcpu_thread(void); void cpu_reload_memory_map(CPUState *cpu); void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); /* cputlb.c */
Note that even after this patch, most callers of address_space_* functions must still be under the big QEMU lock, otherwise the memory region returned by address_space_translate can disappear as soon as address_space_translate returns. This will be fixed in the next part of this series. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- cpu-exec.c | 13 ++++++++++++- cpus.c | 2 +- cputlb.c | 8 ++++++-- exec.c | 34 ++++++++++++++++++++++++++-------- hw/i386/intel_iommu.c | 3 +++ hw/pci-host/apb.c | 1 + hw/ppc/spapr_iommu.c | 1 + include/exec/exec-all.h | 1 + 8 files changed, 51 insertions(+), 12 deletions(-)