Patchwork [v5,6/8] memory: make mmio dispatch able to be out of biglock

login
register
mail settings
Submitter pingfank@linux.vnet.ibm.com
Date Oct. 28, 2012, 11:48 p.m.
Message ID <1351468127-15025-7-git-send-email-pingfank@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/194770/
State New
Headers show

Comments

pingfank@linux.vnet.ibm.com - Oct. 28, 2012, 11:48 p.m.
Without biglock, we try to protect the mr by increase refcnt.
If we can inc refcnt, go backward and resort to biglock.

Another point is memory radix-tree can be flushed by another
thread, so we should get the copy of terminal mr to survive
from such issue.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 docs/memory.txt   |    4 +
 exec.c            |  170 +++++++++++++++++++++++++++++++++++++++++++++++-----
 hw/pci.c          |    3 +-
 memory-internal.h |    1 +
 memory.h          |    2 +
 5 files changed, 162 insertions(+), 18 deletions(-)
Avi Kivity - Oct. 29, 2012, 9:41 a.m.
On 10/29/2012 01:48 AM, Liu Ping Fan wrote:
> Without biglock, we try to protect the mr by increase refcnt.
> If we can inc refcnt, go backward and resort to biglock.
> 
> Another point is memory radix-tree can be flushed by another
> thread, so we should get the copy of terminal mr to survive
> from such issue.
> 

> +static bool memory_region_section_ref(MemoryRegionSection *mrs)
> +{
> +    MemoryRegion *mr;
> +    bool ret = false;
> +
> +    mr = mrs->mr;
> +    if (mr->ops && mr->ops->ref) {
> +        ret = mr->ops->ref(mr);

I still don't see why ->ref() needs to return something.

> +    }
> +    return ret;
> +}
> +
>  
>      while (len > 0) {
>          page = addr & TARGET_PAGE_MASK;
>          l = (page + TARGET_PAGE_SIZE) - addr;
>          if (l > len)
>              l = len;
> -        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
> +
> +        if (as->lock) {
> +            qemu_mutex_lock(as->lock);
> +            safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
> +            qemu_mutex_unlock(as->lock);
> +            if (!safe_ref) {
> +                qemu_mutex_lock_iothread();
> +                qemu_mutex_lock(as->lock);
> +                /* when 2nd try, mem map can change, need to judge it again */
> +                safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
> +                qemu_mutex_unlock(as->lock);
> +                if (safe_ref) {
> +                    qemu_mutex_unlock_iothread();
> +                }
> +            }
> +        } else {
> +            /* Caller hold the big lock */
> +            memory_region_section_lookup_ref(d, page, &obj_mrs);

It's not a property of the address space, it's a property of the caller.

> +        }
> +        section = &obj_mrs;
>  
>          if (is_write) {
>              if (!memory_region_is_ram(section->mr)) {
pingfan liu - Oct. 30, 2012, 7:06 a.m.
On Mon, Oct 29, 2012 at 5:41 PM, Avi Kivity <avi@redhat.com> wrote:
> On 10/29/2012 01:48 AM, Liu Ping Fan wrote:
>> Without biglock, we try to protect the mr by increase refcnt.
>> If we can inc refcnt, go backward and resort to biglock.
>>
>> Another point is memory radix-tree can be flushed by another
>> thread, so we should get the copy of terminal mr to survive
>> from such issue.
>>
>
>> +static bool memory_region_section_ref(MemoryRegionSection *mrs)
>> +{
>> +    MemoryRegion *mr;
>> +    bool ret = false;
>> +
>> +    mr = mrs->mr;
>> +    if (mr->ops && mr->ops->ref) {
>> +        ret = mr->ops->ref(mr);
>
> I still don't see why ->ref() needs to return something.
>
My original design use it to trace refcnt on object, but now it
abandons.  Will drop it.
>> +    }
>> +    return ret;
>> +}
>> +
>>
>>      while (len > 0) {
>>          page = addr & TARGET_PAGE_MASK;
>>          l = (page + TARGET_PAGE_SIZE) - addr;
>>          if (l > len)
>>              l = len;
>> -        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>> +
>> +        if (as->lock) {
>> +            qemu_mutex_lock(as->lock);
>> +            safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>> +            qemu_mutex_unlock(as->lock);
>> +            if (!safe_ref) {
>> +                qemu_mutex_lock_iothread();
>> +                qemu_mutex_lock(as->lock);
>> +                /* when 2nd try, mem map can change, need to judge it again */
>> +                safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>> +                qemu_mutex_unlock(as->lock);
>> +                if (safe_ref) {
>> +                    qemu_mutex_unlock_iothread();
>> +                }
>> +            }
>> +        } else {
>> +            /* Caller hold the big lock */
>> +            memory_region_section_lookup_ref(d, page, &obj_mrs);
>
> It's not a property of the address space, it's a property of the caller.
>
Sorry, what is your meaning?

>> +        }
>> +        section = &obj_mrs;
>>
>>          if (is_write) {
>>              if (!memory_region_is_ram(section->mr)) {
>
>
> --
> error compiling committee.c: too many arguments to function
>
pingfan liu - Nov. 1, 2012, 2:04 a.m.
On Tue, Oct 30, 2012 at 3:06 PM, liu ping fan <qemulist@gmail.com> wrote:
> On Mon, Oct 29, 2012 at 5:41 PM, Avi Kivity <avi@redhat.com> wrote:
>> On 10/29/2012 01:48 AM, Liu Ping Fan wrote:
>>> Without biglock, we try to protect the mr by increase refcnt.
>>> If we can inc refcnt, go backward and resort to biglock.
>>>
>>> Another point is memory radix-tree can be flushed by another
>>> thread, so we should get the copy of terminal mr to survive
>>> from such issue.
>>>
>>
>>> +static bool memory_region_section_ref(MemoryRegionSection *mrs)
>>> +{
>>> +    MemoryRegion *mr;
>>> +    bool ret = false;
>>> +
>>> +    mr = mrs->mr;
>>> +    if (mr->ops && mr->ops->ref) {
>>> +        ret = mr->ops->ref(mr);
>>
>> I still don't see why ->ref() needs to return something.
>>
> My original design use it to trace refcnt on object, but now it
> abandons.  Will drop it.
>>> +    }
>>> +    return ret;
>>> +}
>>> +
>>>
>>>      while (len > 0) {
>>>          page = addr & TARGET_PAGE_MASK;
>>>          l = (page + TARGET_PAGE_SIZE) - addr;
>>>          if (l > len)
>>>              l = len;
>>> -        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>>> +
>>> +        if (as->lock) {
>>> +            qemu_mutex_lock(as->lock);
>>> +            safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>>> +            qemu_mutex_unlock(as->lock);
>>> +            if (!safe_ref) {
>>> +                qemu_mutex_lock_iothread();
>>> +                qemu_mutex_lock(as->lock);
>>> +                /* when 2nd try, mem map can change, need to judge it again */
>>> +                safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>>> +                qemu_mutex_unlock(as->lock);
>>> +                if (safe_ref) {
>>> +                    qemu_mutex_unlock_iothread();
>>> +                }
>>> +            }
>>> +        } else {
>>> +            /* Caller hold the big lock */
>>> +            memory_region_section_lookup_ref(d, page, &obj_mrs);
>>
>> It's not a property of the address space, it's a property of the caller.
>>
> Sorry, what is your meaning?
>
ping?

>>> +        }
>>> +        section = &obj_mrs;
>>>
>>>          if (is_write) {
>>>              if (!memory_region_is_ram(section->mr)) {
>>
>>
>> --
>> error compiling committee.c: too many arguments to function
>>
Avi Kivity - Nov. 1, 2012, 3:46 p.m.
On 10/30/2012 09:06 AM, liu ping fan wrote:
>>>      while (len > 0) {
>>>          page = addr & TARGET_PAGE_MASK;
>>>          l = (page + TARGET_PAGE_SIZE) - addr;
>>>          if (l > len)
>>>              l = len;
>>> -        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>>> +
>>> +        if (as->lock) {
>>> +            qemu_mutex_lock(as->lock);
>>> +            safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>>> +            qemu_mutex_unlock(as->lock);
>>> +            if (!safe_ref) {
>>> +                qemu_mutex_lock_iothread();
>>> +                qemu_mutex_lock(as->lock);
>>> +                /* when 2nd try, mem map can change, need to judge it again */
>>> +                safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
>>> +                qemu_mutex_unlock(as->lock);
>>> +                if (safe_ref) {
>>> +                    qemu_mutex_unlock_iothread();
>>> +                }
>>> +            }
>>> +        } else {
>>> +            /* Caller hold the big lock */
>>> +            memory_region_section_lookup_ref(d, page, &obj_mrs);
>>
>> It's not a property of the address space, it's a property of the caller.
>>
> Sorry, what is your meaning?

Whether the caller holds the big lock or not is not known by the address
space.  It is only known by the caller itself.

Patch

diff --git a/docs/memory.txt b/docs/memory.txt
index 5bbee8e..6b3d15a 100644
--- a/docs/memory.txt
+++ b/docs/memory.txt
@@ -170,3 +170,7 @@  various constraints can be supplied to control how these callbacks are called:
  - .old_portio and .old_mmio can be used to ease porting from code using
    cpu_register_io_memory() and register_ioport().  They should not be used
    in new code.
+
+MMIO regions are provided with ->ref() and ->unref() callbacks; This pair callbacks
+are optional. When ref() return non-zero, Both MemoryRegion and its opaque are
+safe to use.
diff --git a/exec.c b/exec.c
index 750008c..46da08c 100644
--- a/exec.c
+++ b/exec.c
@@ -2280,7 +2280,7 @@  static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec
                   section_index);
 }
 
-static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
+static void mem_nop(MemoryListener *listener, MemoryRegionSection *section)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
@@ -2314,6 +2314,26 @@  static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
     }
 }
 
+static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (mr->ops && mr->ops->ref) {
+        mr->ops->ref(mr);
+    }
+    mem_nop(listener, section);
+}
+
+static void mem_del(MemoryListener *listener,
+                            MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (mr->ops && mr->ops->unref) {
+        mr->ops->unref(mr);
+    }
+}
+
 void qemu_flush_coalesced_mmio_buffer(void)
 {
     if (kvm_enabled())
@@ -3165,11 +3185,27 @@  static void io_mem_init(void)
 static void mem_begin(MemoryListener *listener)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
+    AddressSpace *as = d->as;
 
+    /* protect the updating process of mrs in memory core agaist readers */
+    if (as->lock) {
+        qemu_mutex_lock(as->lock);
+    }
     destroy_all_mappings(d);
     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
 }
 
+static void mem_commit(MemoryListener *listener)
+{
+    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch,
+                listener);
+    AddressSpace *as = d->as;
+
+    if (as->lock) {
+        qemu_mutex_unlock(as->lock);
+    }
+}
+
 static void core_begin(MemoryListener *listener)
 {
     phys_sections_clear();
@@ -3243,11 +3279,14 @@  void address_space_init_dispatch(AddressSpace *as)
     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
     d->listener = (MemoryListener) {
         .begin = mem_begin,
+        .commit = mem_commit,
         .region_add = mem_add,
-        .region_nop = mem_add,
+        .region_del = mem_del,
+        .region_nop = mem_nop,
         .priority = 0,
     };
     as->dispatch = d;
+    as->dispatch->as = as;
     memory_listener_register(&d->listener, as);
 }
 
@@ -3265,12 +3304,12 @@  static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
     memory_region_init(system_memory, "system", INT64_MAX);
-    address_space_init(&address_space_memory, system_memory);
+    address_space_init(&address_space_memory, system_memory, true);
     address_space_memory.name = "memory";
 
     system_io = g_malloc(sizeof(*system_io));
     memory_region_init(system_io, "io", 65536);
-    address_space_init(&address_space_io, system_io);
+    address_space_init(&address_space_io, system_io, false);
     address_space_io.name = "I/O";
 
     memory_listener_register(&core_memory_listener, &address_space_memory);
@@ -3345,6 +3384,71 @@  static void invalidate_and_set_dirty(target_phys_addr_t addr,
     xen_modified_memory(addr, length);
 }
 
+static MemoryRegionSection *subpage_get_terminal(subpage_t *mmio,
+    target_phys_addr_t addr)
+{
+    MemoryRegionSection *section;
+    unsigned int idx = SUBPAGE_IDX(addr);
+
+    section = &phys_sections[mmio->sub_section[idx]];
+    return section;
+}
+
+static bool memory_region_section_ref(MemoryRegionSection *mrs)
+{
+    MemoryRegion *mr;
+    bool ret = false;
+
+    mr = mrs->mr;
+    if (mr->ops && mr->ops->ref) {
+        ret = mr->ops->ref(mr);
+    }
+    return ret;
+}
+
+static void memory_region_section_unref(MemoryRegionSection *mrs)
+{
+    MemoryRegion *mr;
+
+    mr = mrs->mr;
+    if (mr->ops && mr->ops->unref) {
+        mr->ops->unref(mr);
+    }
+}
+
+static bool memory_region_section_lookup_ref(AddressSpaceDispatch *d,
+    target_phys_addr_t addr, MemoryRegionSection *mrs)
+{
+    MemoryRegionSection *section;
+    bool ret;
+
+    section = phys_page_find(d, addr >> TARGET_PAGE_BITS);
+    if (section->mr->subpage) {
+        section = subpage_get_terminal(section->mr->opaque, addr);
+    }
+    *mrs = *section;
+    ret = memory_region_section_ref(mrs);
+
+    return ret;
+}
+
+static bool address_space_section_lookup_ref(AddressSpace *as,
+    target_phys_addr_t page, MemoryRegionSection *mrs)
+{
+    bool safe_ref;
+    AddressSpaceDispatch *d = as->dispatch;
+
+    if (as->lock) {
+        qemu_mutex_lock(as->lock);
+        safe_ref = memory_region_section_lookup_ref(d, page, mrs);
+        qemu_mutex_unlock(as->lock);
+    } else {
+        safe_ref = memory_region_section_lookup_ref(d, page, mrs);
+    }
+
+    return safe_ref;
+}
+
 void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
                       int len, bool is_write)
 {
@@ -3353,14 +3457,34 @@  void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    MemoryRegionSection *section;
+    bool safe_ref = false;
+    MemoryRegionSection *section, obj_mrs;
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+
+        if (as->lock) {
+            qemu_mutex_lock(as->lock);
+            safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
+            qemu_mutex_unlock(as->lock);
+            if (!safe_ref) {
+                qemu_mutex_lock_iothread();
+                qemu_mutex_lock(as->lock);
+                /* when 2nd try, mem map can change, need to judge it again */
+                safe_ref = memory_region_section_lookup_ref(d, page, &obj_mrs);
+                qemu_mutex_unlock(as->lock);
+                if (safe_ref) {
+                    qemu_mutex_unlock_iothread();
+                }
+            }
+        } else {
+            /* Caller hold the big lock */
+            memory_region_section_lookup_ref(d, page, &obj_mrs);
+        }
+        section = &obj_mrs;
 
         if (is_write) {
             if (!memory_region_is_ram(section->mr)) {
@@ -3425,9 +3549,13 @@  void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
                 qemu_put_ram_ptr(ptr);
             }
         }
+        memory_region_section_unref(&obj_mrs);
         len -= l;
         buf += l;
         addr += l;
+        if (!safe_ref && as->lock) {
+            qemu_mutex_unlock_iothread();
+        }
     }
 }
 
@@ -3460,18 +3588,19 @@  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
                                    const uint8_t *buf, int len)
 {
-    AddressSpaceDispatch *d = address_space_memory.dispatch;
     int l;
     uint8_t *ptr;
     target_phys_addr_t page;
-    MemoryRegionSection *section;
+    MemoryRegionSection *section, mr_obj;
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+        address_space_section_lookup_ref(&address_space_memory,
+                page >> TARGET_PAGE_BITS, &mr_obj);
+        section = &mr_obj;
 
         if (!(memory_region_is_ram(section->mr) ||
               memory_region_is_romd(section->mr))) {
@@ -3489,6 +3618,7 @@  void cpu_physical_memory_write_rom(target_phys_addr_t addr,
         len -= l;
         buf += l;
         addr += l;
+        memory_region_section_unref(&mr_obj);
     }
 }
 
@@ -3550,12 +3680,11 @@  void *address_space_map(AddressSpace *as,
                         target_phys_addr_t *plen,
                         bool is_write)
 {
-    AddressSpaceDispatch *d = as->dispatch;
     target_phys_addr_t len = *plen;
     target_phys_addr_t todo = 0;
     int l;
     target_phys_addr_t page;
-    MemoryRegionSection *section;
+    MemoryRegionSection *section, mr_obj;
     ram_addr_t raddr = RAM_ADDR_MAX;
     ram_addr_t rlen;
     void *ret;
@@ -3565,7 +3694,8 @@  void *address_space_map(AddressSpace *as,
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+        address_space_section_lookup_ref(as, page >> TARGET_PAGE_BITS, &mr_obj);
+        section = &mr_obj;
 
         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
             if (todo || bounce.buffer) {
@@ -3579,6 +3709,7 @@  void *address_space_map(AddressSpace *as,
             }
 
             *plen = l;
+            memory_region_section_unref(&mr_obj);
             return bounce.buffer;
         }
         if (!todo) {
@@ -3589,6 +3720,7 @@  void *address_space_map(AddressSpace *as,
         len -= l;
         addr += l;
         todo += l;
+        memory_region_section_unref(&mr_obj);
     }
     rlen = todo;
     ret = qemu_ram_ptr_length(raddr, &rlen);
@@ -4197,12 +4329,16 @@  bool virtio_is_big_endian(void)
 #ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
 {
-    MemoryRegionSection *section;
-
-    section = phys_page_find(address_space_memory.dispatch,
-                             phys_addr >> TARGET_PAGE_BITS);
+    MemoryRegionSection *section, mr_obj;
+    bool ret;
 
-    return !(memory_region_is_ram(section->mr) ||
+    address_space_section_lookup_ref(&address_space_memory,
+                             phys_addr >> TARGET_PAGE_BITS, &mr_obj);
+    section = &mr_obj;
+    ret = !(memory_region_is_ram(section->mr) ||
              memory_region_is_romd(section->mr));
+    memory_region_section_unref(&mr_obj);
+
+    return ret;
 }
 #endif
diff --git a/hw/pci.c b/hw/pci.c
index 9ba589e..64206e6 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -786,7 +786,8 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                  get_system_memory(), 0,
                                  memory_region_size(get_system_memory()));
         memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-        address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+        address_space_init(&pci_dev->bus_master_as,
+                        &pci_dev->bus_master_enable_region, true);
         pci_dev->dma = g_new(DMAContext, 1);
         dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
     }
diff --git a/memory-internal.h b/memory-internal.h
index b33a99d..ea06bfb 100644
--- a/memory-internal.h
+++ b/memory-internal.h
@@ -38,6 +38,7 @@  struct AddressSpaceDispatch {
      */
     PhysPageEntry phys_map;
     MemoryListener listener;
+    AddressSpace *as;
 };
 
 void address_space_init_dispatch(AddressSpace *as);
diff --git a/memory.h b/memory.h
index 12d1c56..8ca9bcc 100644
--- a/memory.h
+++ b/memory.h
@@ -67,6 +67,8 @@  struct MemoryRegionOps {
                   target_phys_addr_t addr,
                   uint64_t data,
                   unsigned size);
+    bool (*ref)(MemoryRegion *mr);
+    void (*unref)(MemoryRegion *mr);
 
     enum device_endian endianness;
     /* Guest-visible constraints: */