Patchwork [30/30] exec: put address space dispatch under RCU critical section

login
register
mail settings
Submitter Paolo Bonzini
Date June 28, 2013, 6:26 p.m.
Message ID <1372444009-11544-31-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/255520/
State New
Headers show

Comments

Paolo Bonzini - June 28, 2013, 6:26 p.m.
With this change, address space dispatch can be moved outside the
BQL.  The actual I/O would still have to happen within the lock.

The next step would be to introduce a function that can only
be called from outside the BQL, address_space_rw_unlocked.  The
function would do something like

    mr = address_space_translate(...)
    if (!mr->unlocked) {
        locked = true;
        qemu_mutex_lock_iothread();
    }
    ...dispatch...
    if (locked) {
        qemu_mutex_unlock_iothread();
    }

(Note that subpages are already ready to be changed to unlocked
access).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cputlb.c             |  4 +++-
 exec.c               | 30 ++++++++++++++++++++++++++----
 hw/ppc/spapr_iommu.c | 10 +++++++++-
 3 files changed, 38 insertions(+), 6 deletions(-)
Jan Kiszka - June 28, 2013, 7:38 p.m.
On 2013-06-28 20:26, Paolo Bonzini wrote:
> With this change, address space dispatch can be moved outside the
> BQL.  The actual I/O would still have to happen within the lock.
> 
> The next step would be to introduce a function that can only
> be called from outside the BQL, address_space_rw_unlocked.  The
> function would do something like
> 
>     mr = address_space_translate(...)
>     if (!mr->unlocked) {
>         locked = true;
>         qemu_mutex_lock_iothread();
>     }
>     ...dispatch...
>     if (locked) {
>         qemu_mutex_unlock_iothread();
>     }
> 
> (Note that subpages are already ready to be changed to unlocked
> access).
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cputlb.c             |  4 +++-
>  exec.c               | 30 ++++++++++++++++++++++++++----
>  hw/ppc/spapr_iommu.c | 10 +++++++++-
>  3 files changed, 38 insertions(+), 6 deletions(-)
> 
> diff --git a/cputlb.c b/cputlb.c
> index 82875b1..97bfe70 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -267,8 +267,9 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>          tlb_add_large_page(env, vaddr, size);
>      }
>  
> +    rcu_read_lock();
>      sz = size;
> -    d = address_space_memory.dispatch;
> +    d = rcu_dereference(&address_space_memory.dispatch);
>      section = address_space_translate_for_iotlb(d, paddr, &xlat, &sz);
>      assert(sz >= TARGET_PAGE_SIZE);
>  
> @@ -321,6 +322,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      } else {
>          te->addr_write = -1;
>      }
> +    rcu_read_unlock();
>  }
>  
>  /* NOTE: this function can trigger an exception */
> diff --git a/exec.c b/exec.c
> index e564014..8c6f925 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -97,6 +97,8 @@ struct PhysPageEntry {
>  typedef PhysPageEntry Node[L2_SIZE];
>  
>  struct AddressSpaceDispatch {
> +    struct rcu_head rcu;
> +
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
> @@ -120,6 +122,8 @@ typedef struct subpage_t {
>  #define PHYS_SECTION_WATCH 3
>  
>  typedef struct PhysPageMap {
> +    struct rcu_head rcu;
> +
>      unsigned sections_nb;
>      unsigned sections_nb_alloc;
>      unsigned nodes_nb;
> @@ -236,6 +240,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>          && mr != &io_mem_watch;
>  }
>  
> +/* Called from RCU critical section */
>  static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>                                                          hwaddr addr,
>                                                          bool resolve_subpage)
> @@ -252,6 +257,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      return section;
>  }
>  
> +/* Called from RCU critical section */
>  static MemoryRegionSection *
>  address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
>                                   hwaddr *plen, bool resolve_subpage)
> @@ -280,8 +286,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>      MemoryRegion *mr;
>      hwaddr len = *plen;
>  
> +    rcu_read_lock();
>      for (;;) {
> -        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
> +        AddressSpaceDispatch *d = rcu_dereference(&as->dispatch);
> +        section = address_space_translate_internal(d, addr, &addr, plen, true);
>          mr = section->mr;
>  
>          if (!mr->iommu_ops) {
> @@ -303,9 +311,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>      *plen = len;
>      *xlat = addr;
>      memory_region_ref(mr);
> +    rcu_read_unlock();
>      return mr;
>  }

At this point, do we still have unowned memory regions? If so, we must
not return them here if the caller is not holding the BQL (which is
supposed to protect their registration/modification/deregistration).

I played with a version today that returns NULL in this case. Then the
caller of address_space_translate can take the BQL and retry the
translation.

Jan
Paolo Bonzini - July 1, 2013, 11:48 a.m.
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Il 28/06/2013 21:38, Jan Kiszka ha scritto:
> At this point, do we still have unowned memory regions?

Yes, for stuff that is registered by the boards or by non-qdevified
devices.  In either case, they cannot disappear so it's fine that you
call address_space_translate outside the BQL---even if they lack an owner.

Paolo

> If so, we must not return them here if the caller is not holding 
> the BQL (which is supposed to protect their 
> registration/modification/deregistration).
> 
> I played with a version today that returns NULL in this case. Then 
> the caller of address_space_translate can take the BQL and retry 
> the translation.
> 
> Jan
> 
> 

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJR0WyRAAoJEBvWZb6bTYbyJioP/jj39ehtAAjJA0I3heu6oCwW
fGezYd20NnFQg7kWzNceJM52RT7pIqX/z1a1YRYWHz+izk6ZyfJ36EM0OOa4QSJK
yz03mpcSh52XS+kDPzlMYs6OpKRcIPCcR3WU8CP3/ic1lU/74VlXT5jIme0omIk7
3IWBviwrsV69Fz++2kKOTmmPjT63gudKOuZbCrvHnIkwatg3U6wQVDkOn/rFnVCC
S+LjVJpdBDO9m4y4kwGtSsboepO+Yhv31Q2HihLFOFUG9qwBkeqASIQl/kYxjIGl
J17gS37waujKLBNdUEYvzTf9km/XUppc8Bdp1IjPQo6bLJeB6UFFTTkmjdH2qmLn
Qy6eO7LrS1ag7NCaJqY1tZQFyp+fkRf5TTAlf6to7N/4pmb3jFIOueLohQGDeL8b
m6yJmmViaAGRdX1dcpzu7kYlwHpQkAccK/Bg6LnvP9iF3DC/vulVupiDl6BIwuDF
qYzOuQyIqQdp2m2Xf8x5+VJgwLSjHV8I7PxKwFD3q6pIhqRrmKuwc9TgWHQwVBWA
04X/PclkMXfAo82Pcuncztibk+Ft9X7U2svSy2OLsZC9c1qY+BUwpNPKRIBbOdM4
2mZsJCMqDvLZwf6GHKAt4NqY0dyRycHENIqPYcfO+CK2FGsodf4/xQ27/g/5l1Zu
+oflF+G6n8yKtEz0Qz2q
=S4/P
-----END PGP SIGNATURE-----

Patch

diff --git a/cputlb.c b/cputlb.c
index 82875b1..97bfe70 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -267,8 +267,9 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
         tlb_add_large_page(env, vaddr, size);
     }
 
+    rcu_read_lock();
     sz = size;
-    d = address_space_memory.dispatch;
+    d = rcu_dereference(&address_space_memory.dispatch);
     section = address_space_translate_for_iotlb(d, paddr, &xlat, &sz);
     assert(sz >= TARGET_PAGE_SIZE);
 
@@ -321,6 +322,7 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     } else {
         te->addr_write = -1;
     }
+    rcu_read_unlock();
 }
 
 /* NOTE: this function can trigger an exception */
diff --git a/exec.c b/exec.c
index e564014..8c6f925 100644
--- a/exec.c
+++ b/exec.c
@@ -97,6 +97,8 @@  struct PhysPageEntry {
 typedef PhysPageEntry Node[L2_SIZE];
 
 struct AddressSpaceDispatch {
+    struct rcu_head rcu;
+
     /* This is a multi-level map on the physical address space.
      * The bottom level has pointers to MemoryRegionSections.
      */
@@ -120,6 +122,8 @@  typedef struct subpage_t {
 #define PHYS_SECTION_WATCH 3
 
 typedef struct PhysPageMap {
+    struct rcu_head rcu;
+
     unsigned sections_nb;
     unsigned sections_nb_alloc;
     unsigned nodes_nb;
@@ -236,6 +240,7 @@  bool memory_region_is_unassigned(MemoryRegion *mr)
         && mr != &io_mem_watch;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
                                                         hwaddr addr,
                                                         bool resolve_subpage)
@@ -252,6 +257,7 @@  static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
     return section;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *
 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen, bool resolve_subpage)
@@ -280,8 +286,10 @@  MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     MemoryRegion *mr;
     hwaddr len = *plen;
 
+    rcu_read_lock();
     for (;;) {
-        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
+        AddressSpaceDispatch *d = rcu_dereference(&as->dispatch);
+        section = address_space_translate_internal(d, addr, &addr, plen, true);
         mr = section->mr;
 
         if (!mr->iommu_ops) {
@@ -303,9 +311,11 @@  MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     *plen = len;
     *xlat = addr;
     memory_region_ref(mr);
+    rcu_read_unlock();
     return mr;
 }
 
+/* Called from RCU critical section */
 MemoryRegionSection *
 address_space_translate_for_iotlb(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
                                   hwaddr *plen)
@@ -727,6 +737,7 @@  static int cpu_physical_memory_set_dirty_tracking(int enable)
     return ret;
 }
 
+/* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(AddressSpaceDispatch *d,
                                        CPUArchState *env,
                                        MemoryRegionSection *section,
@@ -1706,6 +1717,11 @@  static uint16_t dummy_section(MemoryRegion *mr)
 
 MemoryRegion *iotlb_to_region(hwaddr index)
 {
+    /* This assumes that address_space_memory.dispatch->sections
+     * does not change between address_space_translate_for_iotlb and
+     * here.  This is currently true because TCG runs within the
+     * BQL.
+     */
     return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
 }
 
@@ -1762,11 +1778,12 @@  static void core_begin(MemoryListener *listener)
 }
 
 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
- * All AddressSpaceDispatch instances have switched to the next map.
+ * All AddressSpaceDispatch instances have switched to the next map, but
+ * there may be concurrent readers.
  */
 static void core_commit(MemoryListener *listener)
 {
-    phys_sections_free(prev_map);
+    call_rcu(prev_map, phys_sections_free, rcu);
 }
 
 static void tcg_commit(MemoryListener *listener)
@@ -1816,13 +1833,18 @@  void address_space_init_dispatch(AddressSpace *as)
     memory_listener_register(&as->dispatch_listener, as);
 }
 
+static void address_space_dispatch_free(AddressSpaceDispatch *d)
+{
+    g_free(d);
+}
+
 void address_space_destroy_dispatch(AddressSpace *as)
 {
     AddressSpaceDispatch *d = as->dispatch;
 
     memory_listener_unregister(&as->dispatch_listener);
-    g_free(d);
     as->dispatch = NULL;
+    call_rcu(d, address_space_dispatch_free, rcu);
 }
 
 static void memory_map_init(void)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 89b33a5..e16b94b 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -37,6 +37,7 @@  enum sPAPRTCEAccess {
 };
 
 struct sPAPRTCETable {
+    struct rcu_head rcu;
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
@@ -68,6 +69,8 @@  static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
+/* Called within RCU critical section */
+
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
 {
     sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
@@ -159,7 +162,7 @@  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t wi
     return tcet;
 }
 
-void spapr_tce_free(sPAPRTCETable *tcet)
+static void spapr_tce_do_free(sPAPRTCETable *tcet)
 {
     QLIST_REMOVE(tcet, list);
 
@@ -172,6 +175,11 @@  void spapr_tce_free(sPAPRTCETable *tcet)
     g_free(tcet);
 }
 
+void spapr_tce_free(sPAPRTCETable *tcet)
+{
+    call_rcu(tcet, spapr_tce_do_free, rcu);
+}
+
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
 {
     return &tcet->iommu;