Patchwork [1/1] acpi_init: move to full RCU free for IO mappings

login
register
mail settings
Submitter Andy Whitcroft
Date Sept. 23, 2011, 3:50 p.m.
Message ID <1316793027-18367-2-git-send-email-apw@canonical.com>
Download mbox | patch
Permalink /patch/116113/
State New
Headers show

Comments

Andy Whitcroft - Sept. 23, 2011, 3:50 p.m.
During acpi_init we map and unmap a large number of mappings.  The existing
code uses 'simple' RCU based free, triggering a full sychronize_rcu()
against the caller.  If the machine is busy (for example unpacking the
initramfs) at the time this triggers a significant delay until all cpus
rendevous.  Move those to a delayed RCU free callback.

For our reference platform this cuts the average acpi_init time in half,
dropping boot time by .25s.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
---
 drivers/acpi/atomicio.c |   12 +++++++++---
 drivers/acpi/osl.c      |   16 +++++++++++-----
 2 files changed, 20 insertions(+), 8 deletions(-)
Tim Gardner - Sept. 23, 2011, 4:23 p.m.
On 09/23/2011 09:50 AM, Andy Whitcroft wrote:
> During acpi_init we map and unmap a large number of mappings.  The existing
> code uses 'simple' RCU based free, triggering a full sychronize_rcu()
> against the caller.  If the machine is busy (for example unpacking the
> initramfs) at the time this triggers a significant delay until all cpus
> rendevous.  Move those to a delayed RCU free callback.
>
> For our reference platform this cuts the average acpi_init time in half,
> dropping boot time by .25s.
>
> Signed-off-by: Andy Whitcroft<apw@canonical.com>
> ---
>   drivers/acpi/atomicio.c |   12 +++++++++---
>   drivers/acpi/osl.c      |   16 +++++++++++-----
>   2 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
> index 7489b89..0a7a554 100644
> --- a/drivers/acpi/atomicio.c
> +++ b/drivers/acpi/atomicio.c
> @@ -49,6 +49,7 @@ struct acpi_iomap {
>   	unsigned long size;
>   	phys_addr_t paddr;
>   	struct kref ref;
> +	struct rcu_head iomap_rcu;
>   };
>
>   /* acpi_iomaps_lock or RCU read lock must be held before calling */
> @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref)
>    * Used to post-unmap the specified IO memory area. The iounmap is
>    * done only if the reference count goes zero.
>    */
> +static void acpi_post_unmap_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_iomap *map = container_of(rcu,
> +					struct acpi_iomap, iomap_rcu);
> +	iounmap(map->vaddr);
> +	kfree(map);
> +}
>   static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>   {
>   	struct acpi_iomap *map;
> @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>   	if (!del)
>   		return;
>
> -	synchronize_rcu();
> -	iounmap(map->vaddr);
> -	kfree(map);
> +	call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu);
>   }
>
>   /* In NMI handler, should set silent = 1 */
> diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
> index 372f9b7..9b3a520 100644
> --- a/drivers/acpi/osl.c
> +++ b/drivers/acpi/osl.c
> @@ -105,6 +105,7 @@ struct acpi_ioremap {
>   	acpi_physical_address phys;
>   	acpi_size size;
>   	unsigned long refcount;
> +	struct rcu_head map_rcu;
>   };
>
>   static LIST_HEAD(acpi_ioremaps);
> @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
>   		list_del_rcu(&map->list);
>   }
>
> +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_ioremap *map = container_of(rcu,
> +					struct acpi_ioremap, map_rcu);
> +	iounmap(map->virt);
> +	kfree(map);
> +}
> +
>   static void acpi_os_map_cleanup(struct acpi_ioremap *map)
>   {
> -	if (!map->refcount) {
> -		synchronize_rcu();
> -		iounmap(map->virt);
> -		kfree(map);
> -	}
> +	if (!map->refcount)
> +		call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu);
>   }
>
>   void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)

Should this be 'UBUNTU: SAUCE:' until its upstreamed (which I think is a 
reasonable thing to do)?

Acked-by: Tim Gardner <tim.gardner@canonical.com>
Andy Whitcroft - Sept. 23, 2011, 4:56 p.m.
On Fri, Sep 23, 2011 at 10:23:03AM -0600, Tim Gardner wrote:
> On 09/23/2011 09:50 AM, Andy Whitcroft wrote:
> >During acpi_init we map and unmap a large number of mappings.  The existing
> >code uses 'simple' RCU based free, triggering a full sychronize_rcu()
> >against the caller.  If the machine is busy (for example unpacking the
> >initramfs) at the time this triggers a significant delay until all cpus
> >rendevous.  Move those to a delayed RCU free callback.
> >
> >For our reference platform this cuts the average acpi_init time in half,
> >dropping boot time by .25s.
> >
> >Signed-off-by: Andy Whitcroft<apw@canonical.com>
> >---
> >  drivers/acpi/atomicio.c |   12 +++++++++---
> >  drivers/acpi/osl.c      |   16 +++++++++++-----
> >  2 files changed, 20 insertions(+), 8 deletions(-)
> >
> >diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
> >index 7489b89..0a7a554 100644
> >--- a/drivers/acpi/atomicio.c
> >+++ b/drivers/acpi/atomicio.c
> >@@ -49,6 +49,7 @@ struct acpi_iomap {
> >  	unsigned long size;
> >  	phys_addr_t paddr;
> >  	struct kref ref;
> >+	struct rcu_head iomap_rcu;
> >  };
> >
> >  /* acpi_iomaps_lock or RCU read lock must be held before calling */
> >@@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref)
> >   * Used to post-unmap the specified IO memory area. The iounmap is
> >   * done only if the reference count goes zero.
> >   */
> >+static void acpi_post_unmap_rcu(struct rcu_head *rcu)
> >+{
> >+	struct acpi_iomap *map = container_of(rcu,
> >+					struct acpi_iomap, iomap_rcu);
> >+	iounmap(map->vaddr);
> >+	kfree(map);
> >+}
> >  static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
> >  {
> >  	struct acpi_iomap *map;
> >@@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
> >  	if (!del)
> >  		return;
> >
> >-	synchronize_rcu();
> >-	iounmap(map->vaddr);
> >-	kfree(map);
> >+	call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu);
> >  }
> >
> >  /* In NMI handler, should set silent = 1 */
> >diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
> >index 372f9b7..9b3a520 100644
> >--- a/drivers/acpi/osl.c
> >+++ b/drivers/acpi/osl.c
> >@@ -105,6 +105,7 @@ struct acpi_ioremap {
> >  	acpi_physical_address phys;
> >  	acpi_size size;
> >  	unsigned long refcount;
> >+	struct rcu_head map_rcu;
> >  };
> >
> >  static LIST_HEAD(acpi_ioremaps);
> >@@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
> >  		list_del_rcu(&map->list);
> >  }
> >
> >+static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu)
> >+{
> >+	struct acpi_ioremap *map = container_of(rcu,
> >+					struct acpi_ioremap, map_rcu);
> >+	iounmap(map->virt);
> >+	kfree(map);
> >+}
> >+
> >  static void acpi_os_map_cleanup(struct acpi_ioremap *map)
> >  {
> >-	if (!map->refcount) {
> >-		synchronize_rcu();
> >-		iounmap(map->virt);
> >-		kfree(map);
> >-	}
> >+	if (!map->refcount)
> >+		call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu);
> >  }
> >
> >  void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
> 
> Should this be 'UBUNTU: SAUCE:' until its upstreamed (which I think
> is a reasonable thing to do)?

Yes it should.

> Acked-by: Tim Gardner <tim.gardner@canonical.com>

-apw
John Johansen - Sept. 23, 2011, 4:56 p.m.
On 09/23/2011 08:50 AM, Andy Whitcroft wrote:
> During acpi_init we map and unmap a large number of mappings.  The existing
> code uses 'simple' RCU based free, triggering a full sychronize_rcu()
> against the caller.  If the machine is busy (for example unpacking the
> initramfs) at the time this triggers a significant delay until all cpus
> rendevous.  Move those to a delayed RCU free callback.
> 
> For our reference platform this cuts the average acpi_init time in half,
> dropping boot time by .25s.
> 
> Signed-off-by: Andy Whitcroft <apw@canonical.com>
looks good

Acked-by: John Johansen <john.johansen@canonical.com>

> ---
>  drivers/acpi/atomicio.c |   12 +++++++++---
>  drivers/acpi/osl.c      |   16 +++++++++++-----
>  2 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
> index 7489b89..0a7a554 100644
> --- a/drivers/acpi/atomicio.c
> +++ b/drivers/acpi/atomicio.c
> @@ -49,6 +49,7 @@ struct acpi_iomap {
>  	unsigned long size;
>  	phys_addr_t paddr;
>  	struct kref ref;
> +	struct rcu_head iomap_rcu;
>  };
>  
>  /* acpi_iomaps_lock or RCU read lock must be held before calling */
> @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref)
>   * Used to post-unmap the specified IO memory area. The iounmap is
>   * done only if the reference count goes zero.
>   */
> +static void acpi_post_unmap_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_iomap *map = container_of(rcu, 
> +					struct acpi_iomap, iomap_rcu);
> +	iounmap(map->vaddr);
> +	kfree(map);
> +}
>  static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>  {
>  	struct acpi_iomap *map;
> @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>  	if (!del)
>  		return;
>  
> -	synchronize_rcu();
> -	iounmap(map->vaddr);
> -	kfree(map);
> +	call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu);
>  }
>  
>  /* In NMI handler, should set silent = 1 */
> diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
> index 372f9b7..9b3a520 100644
> --- a/drivers/acpi/osl.c
> +++ b/drivers/acpi/osl.c
> @@ -105,6 +105,7 @@ struct acpi_ioremap {
>  	acpi_physical_address phys;
>  	acpi_size size;
>  	unsigned long refcount;
> +	struct rcu_head map_rcu;
>  };
>  
>  static LIST_HEAD(acpi_ioremaps);
> @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
>  		list_del_rcu(&map->list);
>  }
>  
> +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_ioremap *map = container_of(rcu,
> +					struct acpi_ioremap, map_rcu);
> +	iounmap(map->virt);
> +	kfree(map);
> +}
> +
>  static void acpi_os_map_cleanup(struct acpi_ioremap *map)
>  {
> -	if (!map->refcount) {
> -		synchronize_rcu();
> -		iounmap(map->virt);
> -		kfree(map);
> -	}
> +	if (!map->refcount)
> +		call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu);
>  }
>  
>  void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
Leann Ogasawara - Sept. 23, 2011, 4:58 p.m.
On Fri, 2011-09-23 at 16:50 +0100, Andy Whitcroft wrote:
> During acpi_init we map and unmap a large number of mappings.  The existing
> code uses 'simple' RCU based free, triggering a full sychronize_rcu()
> against the caller.  If the machine is busy (for example unpacking the
> initramfs) at the time this triggers a significant delay until all cpus
> rendevous.  Move those to a delayed RCU free callback.
> 
> For our reference platform this cuts the average acpi_init time in half,
> dropping boot time by .25s.
> 
> Signed-off-by: Andy Whitcroft <apw@canonical.com>

Acked-by: Leann Ogasawara <leann.ogasawara@canonical.com>

Applied to Oneiric master-next.  As you noted, this is a performance
improvement so I'm going to let it bake in master-next until the first
Oneiric SRU.

Thanks,
Leann

> ---
>  drivers/acpi/atomicio.c |   12 +++++++++---
>  drivers/acpi/osl.c      |   16 +++++++++++-----
>  2 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
> index 7489b89..0a7a554 100644
> --- a/drivers/acpi/atomicio.c
> +++ b/drivers/acpi/atomicio.c
> @@ -49,6 +49,7 @@ struct acpi_iomap {
>  	unsigned long size;
>  	phys_addr_t paddr;
>  	struct kref ref;
> +	struct rcu_head iomap_rcu;
>  };
>  
>  /* acpi_iomaps_lock or RCU read lock must be held before calling */
> @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref)
>   * Used to post-unmap the specified IO memory area. The iounmap is
>   * done only if the reference count goes zero.
>   */
> +static void acpi_post_unmap_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_iomap *map = container_of(rcu, 
> +					struct acpi_iomap, iomap_rcu);
> +	iounmap(map->vaddr);
> +	kfree(map);
> +}
>  static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>  {
>  	struct acpi_iomap *map;
> @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
>  	if (!del)
>  		return;
>  
> -	synchronize_rcu();
> -	iounmap(map->vaddr);
> -	kfree(map);
> +	call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu);
>  }
>  
>  /* In NMI handler, should set silent = 1 */
> diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
> index 372f9b7..9b3a520 100644
> --- a/drivers/acpi/osl.c
> +++ b/drivers/acpi/osl.c
> @@ -105,6 +105,7 @@ struct acpi_ioremap {
>  	acpi_physical_address phys;
>  	acpi_size size;
>  	unsigned long refcount;
> +	struct rcu_head map_rcu;
>  };
>  
>  static LIST_HEAD(acpi_ioremaps);
> @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
>  		list_del_rcu(&map->list);
>  }
>  
> +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu)
> +{
> +	struct acpi_ioremap *map = container_of(rcu,
> +					struct acpi_ioremap, map_rcu);
> +	iounmap(map->virt);
> +	kfree(map);
> +}
> +
>  static void acpi_os_map_cleanup(struct acpi_ioremap *map)
>  {
> -	if (!map->refcount) {
> -		synchronize_rcu();
> -		iounmap(map->virt);
> -		kfree(map);
> -	}
> +	if (!map->refcount)
> +		call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu);
>  }
>  
>  void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
> -- 
> 1.7.4.1
> 
>

Patch

diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
index 7489b89..0a7a554 100644
--- a/drivers/acpi/atomicio.c
+++ b/drivers/acpi/atomicio.c
@@ -49,6 +49,7 @@  struct acpi_iomap {
 	unsigned long size;
 	phys_addr_t paddr;
 	struct kref ref;
+	struct rcu_head iomap_rcu;
 };
 
 /* acpi_iomaps_lock or RCU read lock must be held before calling */
@@ -161,6 +162,13 @@  static void __acpi_kref_del_iomap(struct kref *ref)
  * Used to post-unmap the specified IO memory area. The iounmap is
  * done only if the reference count goes zero.
  */
+static void acpi_post_unmap_rcu(struct rcu_head *rcu)
+{
+	struct acpi_iomap *map = container_of(rcu, 
+					struct acpi_iomap, iomap_rcu);
+	iounmap(map->vaddr);
+	kfree(map);
+}
 static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
 {
 	struct acpi_iomap *map;
@@ -176,9 +184,7 @@  static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
 	if (!del)
 		return;
 
-	synchronize_rcu();
-	iounmap(map->vaddr);
-	kfree(map);
+	call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu);
 }
 
 /* In NMI handler, should set silent = 1 */
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 372f9b7..9b3a520 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -105,6 +105,7 @@  struct acpi_ioremap {
 	acpi_physical_address phys;
 	acpi_size size;
 	unsigned long refcount;
+	struct rcu_head map_rcu;
 };
 
 static LIST_HEAD(acpi_ioremaps);
@@ -373,13 +374,18 @@  static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
 		list_del_rcu(&map->list);
 }
 
+static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu)
+{
+	struct acpi_ioremap *map = container_of(rcu,
+					struct acpi_ioremap, map_rcu);
+	iounmap(map->virt);
+	kfree(map);
+}
+
 static void acpi_os_map_cleanup(struct acpi_ioremap *map)
 {
-	if (!map->refcount) {
-		synchronize_rcu();
-		iounmap(map->virt);
-		kfree(map);
-	}
+	if (!map->refcount)
+		call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu);
 }
 
 void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)