diff mbox

[RFCv2,4/9] arch/powerpc: Clean up memory hotplug failure paths

Message ID 1454045043-25545-5-git-send-email-david@gibson.dropbear.id.au (mailing list archive)
State Superseded
Headers show

Commit Message

David Gibson Jan. 29, 2016, 5:23 a.m. UTC
This makes a number of cleanups to handling of mapping failures during
memory hotplug on Power:

For errors creating the linear mapping for the hot-added region:
  * This is now reported with EFAULT which is more appropriate than the
    previous EINVAL (the failure is unlikely to be related to the
    function's parameters)
  * An error in this path now prints a warning message, rather than just
    silently failing to add the extra memory.
  * Previously a failure here could result in the region being partially
    mapped.  We now clean up any partial mapping before failing.

For errors creating the vmemmap for the hot-added region:
   * This is now reported with EFAULT instead of causing a BUG() - this
     could happen for external reason (e.g. full hash table) so it's better
     to handle this non-fatally
   * An error message is also printed, so the failure won't be silent
   * As above a failure could cause a partially mapped region, we now
     clean this up.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/mm/hash_utils_64.c | 13 ++++++++++---
 arch/powerpc/mm/init_64.c       | 38 ++++++++++++++++++++++++++------------
 arch/powerpc/mm/mem.c           | 10 ++++++++--
 3 files changed, 44 insertions(+), 17 deletions(-)

Comments

Anshuman Khandual Feb. 1, 2016, 6:29 a.m. UTC | #1
On 01/29/2016 10:53 AM, David Gibson wrote:
> This makes a number of cleanups to handling of mapping failures during
> memory hotplug on Power:
> 
> For errors creating the linear mapping for the hot-added region:
>   * This is now reported with EFAULT which is more appropriate than the
>     previous EINVAL (the failure is unlikely to be related to the
>     function's parameters)
>   * An error in this path now prints a warning message, rather than just
>     silently failing to add the extra memory.
>   * Previously a failure here could result in the region being partially
>     mapped.  We now clean up any partial mapping before failing.
> 
> For errors creating the vmemmap for the hot-added region:
>    * This is now reported with EFAULT instead of causing a BUG() - this
>      could happen for external reason (e.g. full hash table) so it's better
>      to handle this non-fatally
>    * An error message is also printed, so the failure won't be silent
>    * As above a failure could cause a partially mapped region, we now
>      clean this up.

Yeah this greatly improves graceful fall back when when memory mapping
failure happens at the last level during memory hotplug.
Nathan Fontenot Feb. 2, 2016, 3:04 p.m. UTC | #2
On 01/28/2016 11:23 PM, David Gibson wrote:
> This makes a number of cleanups to handling of mapping failures during
> memory hotplug on Power:
> 
> For errors creating the linear mapping for the hot-added region:
>   * This is now reported with EFAULT which is more appropriate than the
>     previous EINVAL (the failure is unlikely to be related to the
>     function's parameters)
>   * An error in this path now prints a warning message, rather than just
>     silently failing to add the extra memory.
>   * Previously a failure here could result in the region being partially
>     mapped.  We now clean up any partial mapping before failing.
> 
> For errors creating the vmemmap for the hot-added region:
>    * This is now reported with EFAULT instead of causing a BUG() - this
>      could happen for external reason (e.g. full hash table) so it's better
>      to handle this non-fatally
>    * An error message is also printed, so the failure won't be silent
>    * As above a failure could cause a partially mapped region, we now
>      clean this up.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  arch/powerpc/mm/hash_utils_64.c | 13 ++++++++++---
>  arch/powerpc/mm/init_64.c       | 38 ++++++++++++++++++++++++++------------
>  arch/powerpc/mm/mem.c           | 10 ++++++++--
>  3 files changed, 44 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index 0737eae..e88a86e 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -635,9 +635,16 @@ static unsigned long __init htab_get_table_size(void)
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int create_section_mapping(unsigned long start, unsigned long end)
>  {
> -	return htab_bolt_mapping(start, end, __pa(start),
> -				 pgprot_val(PAGE_KERNEL), mmu_linear_psize,
> -				 mmu_kernel_ssize);
> +	int rc = htab_bolt_mapping(start, end, __pa(start),
> +				   pgprot_val(PAGE_KERNEL), mmu_linear_psize,
> +				   mmu_kernel_ssize);
> +
> +	if (rc < 0) {
> +		int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
> +					      mmu_kernel_ssize);
> +		BUG_ON(rc2 && (rc2 != -ENOENT));
> +	}
> +	return rc;
>  }
>  

<-- snip -->

>  #ifdef CONFIG_MEMORY_HOTPLUG
> @@ -217,15 +219,20 @@ static void vmemmap_remove_mapping(unsigned long start,
>  }
>  #endif
>  #else /* CONFIG_PPC_BOOK3E */
> -static void __meminit vmemmap_create_mapping(unsigned long start,
> -					     unsigned long page_size,
> -					     unsigned long phys)
> +static int __meminit vmemmap_create_mapping(unsigned long start,
> +					    unsigned long page_size,
> +					    unsigned long phys)
>  {
> -	int  mapped = htab_bolt_mapping(start, start + page_size, phys,
> -					pgprot_val(PAGE_KERNEL),
> -					mmu_vmemmap_psize,
> -					mmu_kernel_ssize);
> -	BUG_ON(mapped < 0);
> +	int rc = htab_bolt_mapping(start, start + page_size, phys,
> +				   pgprot_val(PAGE_KERNEL),
> +				   mmu_vmemmap_psize, mmu_kernel_ssize);
> +	if (rc < 0) {
> +		int rc2 = htab_remove_mapping(start, start + page_size,
> +					      mmu_vmemmap_psize,
> +					      mmu_kernel_ssize);
> +		BUG_ON(rc2 && (rc2 != -ENOENT));
> +	}
> +	return rc;
>  }
>  

If I'm reading this correctly it appears that create_section_mapping() and
vmemmap_create_mapping() for !PPC_BOOK3E are identical. Any reason to not
have one routine, perhaps just have vmemmap_create_mapping() just call
create_section_mapping()?

-Nathan
David Gibson Feb. 3, 2016, 4:31 a.m. UTC | #3
On Tue, Feb 02, 2016 at 09:04:23AM -0600, Nathan Fontenot wrote:
> On 01/28/2016 11:23 PM, David Gibson wrote:
> > This makes a number of cleanups to handling of mapping failures during
> > memory hotplug on Power:
> > 
> > For errors creating the linear mapping for the hot-added region:
> >   * This is now reported with EFAULT which is more appropriate than the
> >     previous EINVAL (the failure is unlikely to be related to the
> >     function's parameters)
> >   * An error in this path now prints a warning message, rather than just
> >     silently failing to add the extra memory.
> >   * Previously a failure here could result in the region being partially
> >     mapped.  We now clean up any partial mapping before failing.
> > 
> > For errors creating the vmemmap for the hot-added region:
> >    * This is now reported with EFAULT instead of causing a BUG() - this
> >      could happen for external reason (e.g. full hash table) so it's better
> >      to handle this non-fatally
> >    * An error message is also printed, so the failure won't be silent
> >    * As above a failure could cause a partially mapped region, we now
> >      clean this up.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  arch/powerpc/mm/hash_utils_64.c | 13 ++++++++++---
> >  arch/powerpc/mm/init_64.c       | 38 ++++++++++++++++++++++++++------------
> >  arch/powerpc/mm/mem.c           | 10 ++++++++--
> >  3 files changed, 44 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> > index 0737eae..e88a86e 100644
> > --- a/arch/powerpc/mm/hash_utils_64.c
> > +++ b/arch/powerpc/mm/hash_utils_64.c
> > @@ -635,9 +635,16 @@ static unsigned long __init htab_get_table_size(void)
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  int create_section_mapping(unsigned long start, unsigned long end)
> >  {
> > -	return htab_bolt_mapping(start, end, __pa(start),
> > -				 pgprot_val(PAGE_KERNEL), mmu_linear_psize,
> > -				 mmu_kernel_ssize);
> > +	int rc = htab_bolt_mapping(start, end, __pa(start),
> > +				   pgprot_val(PAGE_KERNEL), mmu_linear_psize,
> > +				   mmu_kernel_ssize);
> > +
> > +	if (rc < 0) {
> > +		int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
> > +					      mmu_kernel_ssize);
> > +		BUG_ON(rc2 && (rc2 != -ENOENT));
> > +	}
> > +	return rc;
> >  }
> >  
> 
> <-- snip -->
> 
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> > @@ -217,15 +219,20 @@ static void vmemmap_remove_mapping(unsigned long start,
> >  }
> >  #endif
> >  #else /* CONFIG_PPC_BOOK3E */
> > -static void __meminit vmemmap_create_mapping(unsigned long start,
> > -					     unsigned long page_size,
> > -					     unsigned long phys)
> > +static int __meminit vmemmap_create_mapping(unsigned long start,
> > +					    unsigned long page_size,
> > +					    unsigned long phys)
> >  {
> > -	int  mapped = htab_bolt_mapping(start, start + page_size, phys,
> > -					pgprot_val(PAGE_KERNEL),
> > -					mmu_vmemmap_psize,
> > -					mmu_kernel_ssize);
> > -	BUG_ON(mapped < 0);
> > +	int rc = htab_bolt_mapping(start, start + page_size, phys,
> > +				   pgprot_val(PAGE_KERNEL),
> > +				   mmu_vmemmap_psize, mmu_kernel_ssize);
> > +	if (rc < 0) {
> > +		int rc2 = htab_remove_mapping(start, start + page_size,
> > +					      mmu_vmemmap_psize,
> > +					      mmu_kernel_ssize);
> > +		BUG_ON(rc2 && (rc2 != -ENOENT));
> > +	}
> > +	return rc;
> >  }
> >  
> 
> If I'm reading this correctly it appears that create_section_mapping() and
> vmemmap_create_mapping() for !PPC_BOOK3E are identical. Any reason to not
> have one routine, perhaps just have vmemmap_create_mapping() just call
> create_section_mapping()?

Not really, apart from documenting what they're used for.  They're
both fairly trivial wrappers around htab_bolt_mapping().  I think
cleaning this up is outside the scope of this series though.
Paul Mackerras Feb. 8, 2016, 5:47 a.m. UTC | #4
On Fri, Jan 29, 2016 at 04:23:58PM +1100, David Gibson wrote:
> This makes a number of cleanups to handling of mapping failures during
> memory hotplug on Power:
> 
> For errors creating the linear mapping for the hot-added region:
>   * This is now reported with EFAULT which is more appropriate than the
>     previous EINVAL (the failure is unlikely to be related to the
>     function's parameters)
>   * An error in this path now prints a warning message, rather than just
>     silently failing to add the extra memory.
>   * Previously a failure here could result in the region being partially
>     mapped.  We now clean up any partial mapping before failing.
> 
> For errors creating the vmemmap for the hot-added region:
>    * This is now reported with EFAULT instead of causing a BUG() - this
>      could happen for external reason (e.g. full hash table) so it's better
>      to handle this non-fatally
>    * An error message is also printed, so the failure won't be silent
>    * As above a failure could cause a partially mapped region, we now
>      clean this up.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Reviewed-by: Paul Mackerras <paulus@samba.org>
diff mbox

Patch

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0737eae..e88a86e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -635,9 +635,16 @@  static unsigned long __init htab_get_table_size(void)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int create_section_mapping(unsigned long start, unsigned long end)
 {
-	return htab_bolt_mapping(start, end, __pa(start),
-				 pgprot_val(PAGE_KERNEL), mmu_linear_psize,
-				 mmu_kernel_ssize);
+	int rc = htab_bolt_mapping(start, end, __pa(start),
+				   pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+				   mmu_kernel_ssize);
+
+	if (rc < 0) {
+		int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
+					      mmu_kernel_ssize);
+		BUG_ON(rc2 && (rc2 != -ENOENT));
+	}
+	return rc;
 }
 
 int remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index baa1a23..fbc9448 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,9 +188,9 @@  static int __meminit vmemmap_populated(unsigned long start, int page_size)
  */
 
 #ifdef CONFIG_PPC_BOOK3E
-static void __meminit vmemmap_create_mapping(unsigned long start,
-					     unsigned long page_size,
-					     unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+					    unsigned long page_size,
+					    unsigned long phys)
 {
 	/* Create a PTE encoding without page size */
 	unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
@@ -208,6 +208,8 @@  static void __meminit vmemmap_create_mapping(unsigned long start,
 	 */
 	for (i = 0; i < page_size; i += PAGE_SIZE)
 		BUG_ON(map_kernel_page(start + i, phys, flags));
+
+	return 0;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -217,15 +219,20 @@  static void vmemmap_remove_mapping(unsigned long start,
 }
 #endif
 #else /* CONFIG_PPC_BOOK3E */
-static void __meminit vmemmap_create_mapping(unsigned long start,
-					     unsigned long page_size,
-					     unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+					    unsigned long page_size,
+					    unsigned long phys)
 {
-	int  mapped = htab_bolt_mapping(start, start + page_size, phys,
-					pgprot_val(PAGE_KERNEL),
-					mmu_vmemmap_psize,
-					mmu_kernel_ssize);
-	BUG_ON(mapped < 0);
+	int rc = htab_bolt_mapping(start, start + page_size, phys,
+				   pgprot_val(PAGE_KERNEL),
+				   mmu_vmemmap_psize, mmu_kernel_ssize);
+	if (rc < 0) {
+		int rc2 = htab_remove_mapping(start, start + page_size,
+					      mmu_vmemmap_psize,
+					      mmu_kernel_ssize);
+		BUG_ON(rc2 && (rc2 != -ENOENT));
+	}
+	return rc;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -304,6 +311,7 @@  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
 	for (; start < end; start += page_size) {
 		void *p;
+		int rc;
 
 		if (vmemmap_populated(start, page_size))
 			continue;
@@ -317,7 +325,13 @@  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 		pr_debug("      * %016lx..%016lx allocated at %p\n",
 			 start, start + page_size, p);
 
-		vmemmap_create_mapping(start, page_size, __pa(p));
+		rc = vmemmap_create_mapping(start, page_size, __pa(p));
+		if (rc < 0) {
+			pr_warning(
+				"vmemmap_populate: Unable to create vmemmap mapping: %d\n",
+				rc);
+			return -EFAULT;
+		}
 	}
 
 	return 0;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3..8ffc1e2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -119,12 +119,18 @@  int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 	struct zone *zone;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int rc;
 
 	pgdata = NODE_DATA(nid);
 
 	start = (unsigned long)__va(start);
-	if (create_section_mapping(start, start + size))
-		return -EINVAL;
+	rc = create_section_mapping(start, start + size);
+	if (rc) {
+		pr_warning(
+			"Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+			start, start + size, rc);
+		return -EFAULT;
+	}
 
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones +