Message ID | 1350897839-29593-8-git-send-email-pingfank@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On 2012-10-22 11:23, Liu Ping Fan wrote: > Without biglock, we try to protect the mr by increase refcnt. > If we can inc refcnt, go backward and resort to biglock. > > Another point is memory radix-tree can be flushed by another > thread, so we should get the copy of terminal mr to survive > from such issue. > > Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com> > --- > exec.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 files changed, 117 insertions(+), 8 deletions(-) > > diff --git a/exec.c b/exec.c > index 5834766..91b859b 100644 > --- a/exec.c > +++ b/exec.c > @@ -200,6 +200,8 @@ struct PhysPageEntry { > uint16_t ptr : 15; > }; > > +static QemuMutex mem_map_lock; > + > /* Simple allocator for PhysPageEntry nodes */ > static PhysPageEntry (*phys_map_nodes)[L2_SIZE]; > static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc; > @@ -212,6 +214,8 @@ static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; > > static void io_mem_init(void); > static void memory_map_init(void); > +static int phys_page_lookup(target_phys_addr_t addr, MemoryRegionSection *mrs); > + > > static MemoryRegion io_mem_watch; > #endif > @@ -2245,6 +2249,7 @@ static void register_subpage(MemoryRegionSection *section) > subpage_t *subpage; > target_phys_addr_t base = section->offset_within_address_space > & TARGET_PAGE_MASK; > + /* Already under the protection of mem_map_lock */ > MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS); > MemoryRegionSection subsection = { > .offset_within_address_space = base, > @@ -3165,6 +3170,8 @@ static void io_mem_init(void) > > static void core_begin(MemoryListener *listener) > { > + /* protect the updating process of mrs in memory core agaist readers */ > + qemu_mutex_lock(&mem_map_lock); > destroy_all_mappings(); > phys_sections_clear(); > phys_map.ptr = PHYS_MAP_NODE_NIL; > @@ -3184,17 +3191,32 @@ static void core_commit(MemoryListener *listener) > for(env = first_cpu; env != NULL; env = env->next_cpu) { > tlb_flush(env, 1); > } > + qemu_mutex_unlock(&mem_map_lock); > } > > static void core_region_add(MemoryListener *listener, > MemoryRegionSection *section) > { > + MemoryRegion *mr = section->mr; > + > + if (mr->ops) { > + if (mr->ops->ref) { if (mr->ops && mr->ops->ref) { here and in the cases below. Unless we avoid that callback anyway, turning it into an mr flag. > + mr->ops->ref(mr); > + } > + } > cpu_register_physical_memory_log(section, section->readonly); > } > > static void core_region_del(MemoryListener *listener, > MemoryRegionSection *section) > { > + MemoryRegion *mr = section->mr; > + > + if (mr->ops) { > + if (mr->ops->unref) { > + mr->ops->unref(mr); > + } > + } > } > > static void core_region_nop(MemoryListener *listener, > @@ -3348,6 +3370,8 @@ static void memory_map_init(void) > memory_region_init(system_io, "io", 65536); > set_system_io_map(system_io); > > + qemu_mutex_init(&mem_map_lock); > + > memory_listener_register(&core_memory_listener, system_memory); > memory_listener_register(&io_memory_listener, system_io); > } > @@ -3406,6 +3430,58 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, > } > > #else > + > +static MemoryRegionSection *subpage_get_terminal(subpage_t *mmio, > + target_phys_addr_t addr) > +{ > + MemoryRegionSection *section; > + unsigned int idx = SUBPAGE_IDX(addr); > + > + section = &phys_sections[mmio->sub_section[idx]]; > + return section; > +} > + > +static int memory_region_section_ref(MemoryRegionSection *mrs) > +{ > + MemoryRegion *mr; > + int ret = 0; > + > + mr = mrs->mr; > + if (mr->ops) { > + if (mr->ops->ref) { > + ret = mr->ops->ref(mr); > + } > + } > + return ret; The return type should be bool, delivering true if reference was successful. > +} > + > +static void memory_region_section_unref(MemoryRegionSection *mrs) > +{ > + MemoryRegion *mr; > + > + mr = mrs->mr; > + if (mr->ops) { > + if (mr->ops->unref) { > + mr->ops->unref(mr); > + } > + } > +} > + > +static int phys_page_lookup(target_phys_addr_t addr, MemoryRegionSection *mrs) > +{ > + MemoryRegionSection *section; > + int ret; > + > + section = phys_page_find(addr >> TARGET_PAGE_BITS); > + if (section->mr->subpage) { > + section = subpage_get_terminal(section->mr->opaque, addr); > + } > + *mrs = *section; > + ret = memory_region_section_ref(mrs); > + > + return ret; > +} > + > void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, > int len, int is_write) > { > @@ -3413,14 +3489,28 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, > uint8_t *ptr; > uint32_t val; > target_phys_addr_t page; > - MemoryRegionSection *section; > + MemoryRegionSection *section, obj_mrs; > + int safe_ref; > > while (len > 0) { > page = addr & TARGET_PAGE_MASK; > l = (page + TARGET_PAGE_SIZE) - addr; > if (l > len) > l = len; > - section = phys_page_find(page >> TARGET_PAGE_BITS); > + qemu_mutex_lock(&mem_map_lock); > + safe_ref = phys_page_lookup(page, &obj_mrs); > + qemu_mutex_unlock(&mem_map_lock); > + if (safe_ref == 0) { > + qemu_mutex_lock_iothread(); > + qemu_mutex_lock(&mem_map_lock); > + /* At the 2nd try, mem map can change, so need to judge it again */ > + safe_ref = phys_page_lookup(page, &obj_mrs); > + qemu_mutex_unlock(&mem_map_lock); > + if (safe_ref > 0) { > + qemu_mutex_unlock_iothread(); > + } > + } > + section = &obj_mrs; > > if (is_write) { > if (!memory_region_is_ram(section->mr)) { > @@ -3491,10 +3581,16 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, > qemu_put_ram_ptr(ptr); > } > } > + > + memory_region_section_unref(&obj_mrs); The mapping cannot change from not-referenced to reference-counted while we were dispatching? I mean the case where we found not ref callback on entry and took the big lock, but now there is an unref callback. > len -= l; > buf += l; > addr += l; > + if (safe_ref == 0) { > + qemu_mutex_unlock_iothread(); > + } > } > + > } > > /* used for ROM loading : can write in RAM and ROM */ > @@ -3504,14 +3600,18 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, > int l; > uint8_t *ptr; > target_phys_addr_t page; > - MemoryRegionSection *section; > + MemoryRegionSection *section, mr_obj; > > while (len > 0) { > page = addr & TARGET_PAGE_MASK; > l = (page + TARGET_PAGE_SIZE) - addr; > if (l > len) > l = len; > - section = phys_page_find(page >> TARGET_PAGE_BITS); > + > + qemu_mutex_lock(&mem_map_lock); > + phys_page_lookup(page, &mr_obj); > + qemu_mutex_unlock(&mem_map_lock); > + section = &mr_obj; But here we don't care about the return code of phys_page_lookup and all related topics? Because we assume the BQL is held? Reminds me that we will need some support for assert(qemu_mutex_is_locked(&lock)). > > if (!(memory_region_is_ram(section->mr) || > memory_region_is_romd(section->mr))) { > @@ -3528,6 +3628,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, > len -= l; > buf += l; > addr += l; > + memory_region_section_unref(&mr_obj); > } > } > > @@ -3592,7 +3693,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, > target_phys_addr_t todo = 0; > int l; > target_phys_addr_t page; > - MemoryRegionSection *section; > + MemoryRegionSection *section, mr_obj; > ram_addr_t raddr = RAM_ADDR_MAX; > ram_addr_t rlen; > void *ret; > @@ -3602,7 +3703,10 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, > l = (page + TARGET_PAGE_SIZE) - addr; > if (l > len) > l = len; > - section = phys_page_find(page >> TARGET_PAGE_BITS); > + qemu_mutex_lock(&mem_map_lock); > + phys_page_lookup(page, &mr_obj); > + qemu_mutex_unlock(&mem_map_lock); > + section = &mr_obj; > > if (!(memory_region_is_ram(section->mr) && !section->readonly)) { > if (todo || bounce.buffer) { > @@ -3616,6 +3720,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, > } > > *plen = l; > + memory_region_section_unref(&mr_obj); > return bounce.buffer; > } > if (!todo) { > @@ -3630,6 +3735,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, > rlen = todo; > ret = qemu_ram_ptr_length(raddr, &rlen); > *plen = rlen; > + memory_region_section_unref(&mr_obj); > return ret; > } > > @@ -4239,9 +4345,12 @@ bool virtio_is_big_endian(void) > #ifndef CONFIG_USER_ONLY > bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr) > { > - MemoryRegionSection *section; > + MemoryRegionSection *section, mr_obj; > > - section = phys_page_find(phys_addr >> TARGET_PAGE_BITS); > + qemu_mutex_lock(&mem_map_lock); > + phys_page_lookup(phys_addr, &mr_obj); > + qemu_mutex_unlock(&mem_map_lock); > + section = &mr_obj; Err, no unref needed here? > > return !(memory_region_is_ram(section->mr) || > memory_region_is_romd(section->mr)); > Jan
On 10/23/2012 02:12 PM, Jan Kiszka wrote: > On 2012-10-22 11:23, Liu Ping Fan wrote: >> Without biglock, we try to protect the mr by increase refcnt. >> If we can inc refcnt, go backward and resort to biglock. >> >> Another point is memory radix-tree can be flushed by another >> thread, so we should get the copy of terminal mr to survive >> from such issue. >> >> + >> void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >> int len, int is_write) >> { >> @@ -3413,14 +3489,28 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >> uint8_t *ptr; >> uint32_t val; >> target_phys_addr_t page; >> - MemoryRegionSection *section; >> + MemoryRegionSection *section, obj_mrs; >> + int safe_ref; >> >> while (len > 0) { >> page = addr & TARGET_PAGE_MASK; >> l = (page + TARGET_PAGE_SIZE) - addr; >> if (l > len) >> l = len; >> - section = phys_page_find(page >> TARGET_PAGE_BITS); >> + qemu_mutex_lock(&mem_map_lock); >> + safe_ref = phys_page_lookup(page, &obj_mrs); >> + qemu_mutex_unlock(&mem_map_lock); >> + if (safe_ref == 0) { >> + qemu_mutex_lock_iothread(); >> + qemu_mutex_lock(&mem_map_lock); >> + /* At the 2nd try, mem map can change, so need to judge it again */ >> + safe_ref = phys_page_lookup(page, &obj_mrs); >> + qemu_mutex_unlock(&mem_map_lock); >> + if (safe_ref > 0) { >> + qemu_mutex_unlock_iothread(); >> + } >> + } >> + section = &obj_mrs; >> >> if (is_write) { >> if (!memory_region_is_ram(section->mr)) { >> @@ -3491,10 +3581,16 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >> qemu_put_ram_ptr(ptr); >> } >> } >> + >> + memory_region_section_unref(&obj_mrs); > > The mapping cannot change from not-referenced to reference-counted while > we were dispatching? I mean the case where we found not ref callback on > entry and took the big lock, but now there is an unref callback. We drop the big lock in that case, so we end up in the same situation. > >> len -= l; >> buf += l; >> addr += l; >> + if (safe_ref == 0) { >> + qemu_mutex_unlock_iothread(); >> + } >> } >> + >> } >> >> /* used for ROM loading : can write in RAM and ROM */ >> @@ -3504,14 +3600,18 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, >> int l; >> uint8_t *ptr; >> target_phys_addr_t page; >> - MemoryRegionSection *section; >> + MemoryRegionSection *section, mr_obj; >> >> while (len > 0) { >> page = addr & TARGET_PAGE_MASK; >> l = (page + TARGET_PAGE_SIZE) - addr; >> if (l > len) >> l = len; >> - section = phys_page_find(page >> TARGET_PAGE_BITS); >> + >> + qemu_mutex_lock(&mem_map_lock); >> + phys_page_lookup(page, &mr_obj); >> + qemu_mutex_unlock(&mem_map_lock); >> + section = &mr_obj; > > But here we don't care about the return code of phys_page_lookup and all > related topics? Because we assume the BQL is held? Reminds me that we > will need some support for assert(qemu_mutex_is_locked(&lock)). I guess it's better to drop that assumption than to have asymmetric APIs. >> >> @@ -4239,9 +4345,12 @@ bool virtio_is_big_endian(void) >> #ifndef CONFIG_USER_ONLY >> bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr) >> { >> - MemoryRegionSection *section; >> + MemoryRegionSection *section, mr_obj; >> >> - section = phys_page_find(phys_addr >> TARGET_PAGE_BITS); >> + qemu_mutex_lock(&mem_map_lock); >> + phys_page_lookup(phys_addr, &mr_obj); >> + qemu_mutex_unlock(&mem_map_lock); >> + section = &mr_obj; > > Err, no unref needed here? Need _ref in the name to remind reviewers that it leaves the refcount unbalanced.
On Tue, Oct 23, 2012 at 8:36 PM, Avi Kivity <avi@redhat.com> wrote: > On 10/23/2012 02:12 PM, Jan Kiszka wrote: >> On 2012-10-22 11:23, Liu Ping Fan wrote: >>> Without biglock, we try to protect the mr by increase refcnt. >>> If we can inc refcnt, go backward and resort to biglock. >>> >>> Another point is memory radix-tree can be flushed by another >>> thread, so we should get the copy of terminal mr to survive >>> from such issue. >>> >>> + >>> void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>> int len, int is_write) >>> { >>> @@ -3413,14 +3489,28 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>> uint8_t *ptr; >>> uint32_t val; >>> target_phys_addr_t page; >>> - MemoryRegionSection *section; >>> + MemoryRegionSection *section, obj_mrs; >>> + int safe_ref; >>> >>> while (len > 0) { >>> page = addr & TARGET_PAGE_MASK; >>> l = (page + TARGET_PAGE_SIZE) - addr; >>> if (l > len) >>> l = len; >>> - section = phys_page_find(page >> TARGET_PAGE_BITS); >>> + qemu_mutex_lock(&mem_map_lock); >>> + safe_ref = phys_page_lookup(page, &obj_mrs); >>> + qemu_mutex_unlock(&mem_map_lock); >>> + if (safe_ref == 0) { >>> + qemu_mutex_lock_iothread(); >>> + qemu_mutex_lock(&mem_map_lock); >>> + /* At the 2nd try, mem map can change, so need to judge it again */ >>> + safe_ref = phys_page_lookup(page, &obj_mrs); >>> + qemu_mutex_unlock(&mem_map_lock); >>> + if (safe_ref > 0) { >>> + qemu_mutex_unlock_iothread(); >>> + } >>> + } >>> + section = &obj_mrs; >>> >>> if (is_write) { >>> if (!memory_region_is_ram(section->mr)) { >>> @@ -3491,10 +3581,16 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>> qemu_put_ram_ptr(ptr); >>> } >>> } >>> + >>> + memory_region_section_unref(&obj_mrs); >> >> The mapping cannot change from not-referenced to reference-counted while >> we were dispatching? I mean the case where we found not ref callback on >> entry and took the big lock, but now there is an unref callback. > > We drop the big lock in that case, so we end up in the same situation. > >> >>> len -= l; >>> buf += l; >>> addr += l; >>> + if (safe_ref == 0) { >>> + qemu_mutex_unlock_iothread(); >>> + } >>> } >>> + >>> } >>> >>> /* used for ROM loading : can write in RAM and ROM */ >>> @@ -3504,14 +3600,18 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, >>> int l; >>> uint8_t *ptr; >>> target_phys_addr_t page; >>> - MemoryRegionSection *section; >>> + MemoryRegionSection *section, mr_obj; >>> >>> while (len > 0) { >>> page = addr & TARGET_PAGE_MASK; >>> l = (page + TARGET_PAGE_SIZE) - addr; >>> if (l > len) >>> l = len; >>> - section = phys_page_find(page >> TARGET_PAGE_BITS); >>> + >>> + qemu_mutex_lock(&mem_map_lock); >>> + phys_page_lookup(page, &mr_obj); >>> + qemu_mutex_unlock(&mem_map_lock); >>> + section = &mr_obj; >> >> But here we don't care about the return code of phys_page_lookup and all >> related topics? Because we assume the BQL is held? Reminds me that we >> will need some support for assert(qemu_mutex_is_locked(&lock)). > > I guess it's better to drop that assumption than to have asymmetric APIs. > Yes, now the updater of physmap based on mem_map_lock, and the same it will be for readers. >>> >>> @@ -4239,9 +4345,12 @@ bool virtio_is_big_endian(void) >>> #ifndef CONFIG_USER_ONLY >>> bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr) >>> { >>> - MemoryRegionSection *section; >>> + MemoryRegionSection *section, mr_obj; >>> >>> - section = phys_page_find(phys_addr >> TARGET_PAGE_BITS); >>> + qemu_mutex_lock(&mem_map_lock); >>> + phys_page_lookup(phys_addr, &mr_obj); >>> + qemu_mutex_unlock(&mem_map_lock); >>> + section = &mr_obj; >> >> Err, no unref needed here? > > Need _ref in the name to remind reviewers that it leaves the refcount > unbalanced. > Oh, here is a bug, need unref. As to unbalanced refcount, it will be adopted for virtio-blk listener (not implement in this patchset) Regards, pingfan > -- > error compiling committee.c: too many arguments to function >
On Wed, Oct 24, 2012 at 2:31 PM, liu ping fan <qemulist@gmail.com> wrote: > On Tue, Oct 23, 2012 at 8:36 PM, Avi Kivity <avi@redhat.com> wrote: >> On 10/23/2012 02:12 PM, Jan Kiszka wrote: >>> On 2012-10-22 11:23, Liu Ping Fan wrote: >>>> Without biglock, we try to protect the mr by increase refcnt. >>>> If we can inc refcnt, go backward and resort to biglock. >>>> >>>> Another point is memory radix-tree can be flushed by another >>>> thread, so we should get the copy of terminal mr to survive >>>> from such issue. >>>> >>>> + >>>> void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>>> int len, int is_write) >>>> { >>>> @@ -3413,14 +3489,28 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>>> uint8_t *ptr; >>>> uint32_t val; >>>> target_phys_addr_t page; >>>> - MemoryRegionSection *section; >>>> + MemoryRegionSection *section, obj_mrs; >>>> + int safe_ref; >>>> >>>> while (len > 0) { >>>> page = addr & TARGET_PAGE_MASK; >>>> l = (page + TARGET_PAGE_SIZE) - addr; >>>> if (l > len) >>>> l = len; >>>> - section = phys_page_find(page >> TARGET_PAGE_BITS); >>>> + qemu_mutex_lock(&mem_map_lock); >>>> + safe_ref = phys_page_lookup(page, &obj_mrs); >>>> + qemu_mutex_unlock(&mem_map_lock); >>>> + if (safe_ref == 0) { >>>> + qemu_mutex_lock_iothread(); >>>> + qemu_mutex_lock(&mem_map_lock); >>>> + /* At the 2nd try, mem map can change, so need to judge it again */ >>>> + safe_ref = phys_page_lookup(page, &obj_mrs); >>>> + qemu_mutex_unlock(&mem_map_lock); >>>> + if (safe_ref > 0) { >>>> + qemu_mutex_unlock_iothread(); >>>> + } >>>> + } >>>> + section = &obj_mrs; >>>> >>>> if (is_write) { >>>> if (!memory_region_is_ram(section->mr)) { >>>> @@ -3491,10 +3581,16 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, >>>> qemu_put_ram_ptr(ptr); >>>> } >>>> } >>>> + >>>> + memory_region_section_unref(&obj_mrs); >>> >>> The mapping cannot change from not-referenced to reference-counted while >>> we were dispatching? I mean the case where we found not ref callback on >>> entry and took the big lock, but now there is an unref callback. >> >> We drop the big lock in that case, so we end up in the same situation. >> >>> >>>> len -= l; >>>> buf += l; >>>> addr += l; >>>> + if (safe_ref == 0) { >>>> + qemu_mutex_unlock_iothread(); >>>> + } >>>> } >>>> + >>>> } >>>> >>>> /* used for ROM loading : can write in RAM and ROM */ >>>> @@ -3504,14 +3600,18 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, >>>> int l; >>>> uint8_t *ptr; >>>> target_phys_addr_t page; >>>> - MemoryRegionSection *section; >>>> + MemoryRegionSection *section, mr_obj; >>>> >>>> while (len > 0) { >>>> page = addr & TARGET_PAGE_MASK; >>>> l = (page + TARGET_PAGE_SIZE) - addr; >>>> if (l > len) >>>> l = len; >>>> - section = phys_page_find(page >> TARGET_PAGE_BITS); >>>> + >>>> + qemu_mutex_lock(&mem_map_lock); >>>> + phys_page_lookup(page, &mr_obj); >>>> + qemu_mutex_unlock(&mem_map_lock); >>>> + section = &mr_obj; >>> >>> But here we don't care about the return code of phys_page_lookup and all >>> related topics? Because we assume the BQL is held? Reminds me that we >>> will need some support for assert(qemu_mutex_is_locked(&lock)). >> >> I guess it's better to drop that assumption than to have asymmetric APIs. >> > Yes, now the updater of physmap based on mem_map_lock, and the same it > will be for readers. >>>> >>>> @@ -4239,9 +4345,12 @@ bool virtio_is_big_endian(void) >>>> #ifndef CONFIG_USER_ONLY >>>> bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr) >>>> { >>>> - MemoryRegionSection *section; >>>> + MemoryRegionSection *section, mr_obj; >>>> >>>> - section = phys_page_find(phys_addr >> TARGET_PAGE_BITS); >>>> + qemu_mutex_lock(&mem_map_lock); >>>> + phys_page_lookup(phys_addr, &mr_obj); >>>> + qemu_mutex_unlock(&mem_map_lock); >>>> + section = &mr_obj; >>> >>> Err, no unref needed here? >> >> Need _ref in the name to remind reviewers that it leaves the refcount >> unbalanced. >> > Oh, here is a bug, need unref. As to unbalanced refcount, it will be > adopted for virtio-blk listener (not implement in this patchset) > It is like cpu_physical_memory_map/unmap, the map will hold the unbalanced ref, and unmap release it. > Regards, > pingfan >> -- >> error compiling committee.c: too many arguments to function >>
On 10/24/2012 08:56 AM, liu ping fan wrote: >>> >> Oh, here is a bug, need unref. As to unbalanced refcount, it will be >> adopted for virtio-blk listener (not implement in this patchset) >> > It is like cpu_physical_memory_map/unmap, the map will hold the > unbalanced ref, and unmap release it. Those APIs have symmetric names. map/unmap, ref/unref, lock/unlock. But here we have lookup and no unlookup.
diff --git a/exec.c b/exec.c index 5834766..91b859b 100644 --- a/exec.c +++ b/exec.c @@ -200,6 +200,8 @@ struct PhysPageEntry { uint16_t ptr : 15; }; +static QemuMutex mem_map_lock; + /* Simple allocator for PhysPageEntry nodes */ static PhysPageEntry (*phys_map_nodes)[L2_SIZE]; static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc; @@ -212,6 +214,8 @@ static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; static void io_mem_init(void); static void memory_map_init(void); +static int phys_page_lookup(target_phys_addr_t addr, MemoryRegionSection *mrs); + static MemoryRegion io_mem_watch; #endif @@ -2245,6 +2249,7 @@ static void register_subpage(MemoryRegionSection *section) subpage_t *subpage; target_phys_addr_t base = section->offset_within_address_space & TARGET_PAGE_MASK; + /* Already under the protection of mem_map_lock */ MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS); MemoryRegionSection subsection = { .offset_within_address_space = base, @@ -3165,6 +3170,8 @@ static void io_mem_init(void) static void core_begin(MemoryListener *listener) { + /* protect the updating process of mrs in memory core agaist readers */ + qemu_mutex_lock(&mem_map_lock); destroy_all_mappings(); phys_sections_clear(); phys_map.ptr = PHYS_MAP_NODE_NIL; @@ -3184,17 +3191,32 @@ static void core_commit(MemoryListener *listener) for(env = first_cpu; env != NULL; env = env->next_cpu) { tlb_flush(env, 1); } + qemu_mutex_unlock(&mem_map_lock); } static void core_region_add(MemoryListener *listener, MemoryRegionSection *section) { + MemoryRegion *mr = section->mr; + + if (mr->ops) { + if (mr->ops->ref) { + mr->ops->ref(mr); + } + } cpu_register_physical_memory_log(section, section->readonly); } static void core_region_del(MemoryListener *listener, MemoryRegionSection *section) { + MemoryRegion *mr = section->mr; + + if (mr->ops) { + if (mr->ops->unref) { + mr->ops->unref(mr); + } + } } static void core_region_nop(MemoryListener *listener, @@ -3348,6 +3370,8 @@ static void memory_map_init(void) memory_region_init(system_io, "io", 65536); set_system_io_map(system_io); + qemu_mutex_init(&mem_map_lock); + memory_listener_register(&core_memory_listener, system_memory); memory_listener_register(&io_memory_listener, system_io); } @@ -3406,6 +3430,58 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, } #else + +static MemoryRegionSection *subpage_get_terminal(subpage_t *mmio, + target_phys_addr_t addr) +{ + MemoryRegionSection *section; + unsigned int idx = SUBPAGE_IDX(addr); + + section = &phys_sections[mmio->sub_section[idx]]; + return section; +} + +static int memory_region_section_ref(MemoryRegionSection *mrs) +{ + MemoryRegion *mr; + int ret = 0; + + mr = mrs->mr; + if (mr->ops) { + if (mr->ops->ref) { + ret = mr->ops->ref(mr); + } + } + return ret; +} + +static void memory_region_section_unref(MemoryRegionSection *mrs) +{ + MemoryRegion *mr; + + mr = mrs->mr; + if (mr->ops) { + if (mr->ops->unref) { + mr->ops->unref(mr); + } + } +} + +static int phys_page_lookup(target_phys_addr_t addr, MemoryRegionSection *mrs) +{ + MemoryRegionSection *section; + int ret; + + section = phys_page_find(addr >> TARGET_PAGE_BITS); + if (section->mr->subpage) { + section = subpage_get_terminal(section->mr->opaque, addr); + } + *mrs = *section; + ret = memory_region_section_ref(mrs); + + return ret; +} + void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write) { @@ -3413,14 +3489,28 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, uint8_t *ptr; uint32_t val; target_phys_addr_t page; - MemoryRegionSection *section; + MemoryRegionSection *section, obj_mrs; + int safe_ref; while (len > 0) { page = addr & TARGET_PAGE_MASK; l = (page + TARGET_PAGE_SIZE) - addr; if (l > len) l = len; - section = phys_page_find(page >> TARGET_PAGE_BITS); + qemu_mutex_lock(&mem_map_lock); + safe_ref = phys_page_lookup(page, &obj_mrs); + qemu_mutex_unlock(&mem_map_lock); + if (safe_ref == 0) { + qemu_mutex_lock_iothread(); + qemu_mutex_lock(&mem_map_lock); + /* At the 2nd try, mem map can change, so need to judge it again */ + safe_ref = phys_page_lookup(page, &obj_mrs); + qemu_mutex_unlock(&mem_map_lock); + if (safe_ref > 0) { + qemu_mutex_unlock_iothread(); + } + } + section = &obj_mrs; if (is_write) { if (!memory_region_is_ram(section->mr)) { @@ -3491,10 +3581,16 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, qemu_put_ram_ptr(ptr); } } + + memory_region_section_unref(&obj_mrs); len -= l; buf += l; addr += l; + if (safe_ref == 0) { + qemu_mutex_unlock_iothread(); + } } + } /* used for ROM loading : can write in RAM and ROM */ @@ -3504,14 +3600,18 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, int l; uint8_t *ptr; target_phys_addr_t page; - MemoryRegionSection *section; + MemoryRegionSection *section, mr_obj; while (len > 0) { page = addr & TARGET_PAGE_MASK; l = (page + TARGET_PAGE_SIZE) - addr; if (l > len) l = len; - section = phys_page_find(page >> TARGET_PAGE_BITS); + + qemu_mutex_lock(&mem_map_lock); + phys_page_lookup(page, &mr_obj); + qemu_mutex_unlock(&mem_map_lock); + section = &mr_obj; if (!(memory_region_is_ram(section->mr) || memory_region_is_romd(section->mr))) { @@ -3528,6 +3628,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, len -= l; buf += l; addr += l; + memory_region_section_unref(&mr_obj); } } @@ -3592,7 +3693,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, target_phys_addr_t todo = 0; int l; target_phys_addr_t page; - MemoryRegionSection *section; + MemoryRegionSection *section, mr_obj; ram_addr_t raddr = RAM_ADDR_MAX; ram_addr_t rlen; void *ret; @@ -3602,7 +3703,10 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, l = (page + TARGET_PAGE_SIZE) - addr; if (l > len) l = len; - section = phys_page_find(page >> TARGET_PAGE_BITS); + qemu_mutex_lock(&mem_map_lock); + phys_page_lookup(page, &mr_obj); + qemu_mutex_unlock(&mem_map_lock); + section = &mr_obj; if (!(memory_region_is_ram(section->mr) && !section->readonly)) { if (todo || bounce.buffer) { @@ -3616,6 +3720,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, } *plen = l; + memory_region_section_unref(&mr_obj); return bounce.buffer; } if (!todo) { @@ -3630,6 +3735,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, rlen = todo; ret = qemu_ram_ptr_length(raddr, &rlen); *plen = rlen; + memory_region_section_unref(&mr_obj); return ret; } @@ -4239,9 +4345,12 @@ bool virtio_is_big_endian(void) #ifndef CONFIG_USER_ONLY bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr) { - MemoryRegionSection *section; + MemoryRegionSection *section, mr_obj; - section = phys_page_find(phys_addr >> TARGET_PAGE_BITS); + qemu_mutex_lock(&mem_map_lock); + phys_page_lookup(phys_addr, &mr_obj); + qemu_mutex_unlock(&mem_map_lock); + section = &mr_obj; return !(memory_region_is_ram(section->mr) || memory_region_is_romd(section->mr));
Without biglock, we try to protect the mr by increase refcnt. If we can inc refcnt, go backward and resort to biglock. Another point is memory radix-tree can be flushed by another thread, so we should get the copy of terminal mr to survive from such issue. Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com> --- exec.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 117 insertions(+), 8 deletions(-)