diff mbox series

[4/6] powerpc: Chunk calls to flush_dcache_range in arch_*_memory

Message ID 20190815041057.13627-5-alastair@au1.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: convert cache asm to C | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (da206bd46848568e1aaf35f00e2d78bf9bc94f95)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 2 warnings, 0 checks, 45 lines checked

Commit Message

Alastair D'Silva Aug. 15, 2019, 4:10 a.m. UTC
From: Alastair D'Silva <alastair@d-silva.org>

When presented with large amounts of memory being hotplugged
(in my test case, ~890GB), the call to flush_dcache_range takes
a while (~50 seconds), triggering RCU stalls.

This patch breaks up the call into 16GB chunks, calling
cond_resched() inbetween to allow the scheduler to run.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 arch/powerpc/mm/mem.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Mike Rapoport Aug. 15, 2019, 6:54 a.m. UTC | #1
On Thu, Aug 15, 2019 at 02:10:49PM +1000, Alastair D'Silva wrote:
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> When presented with large amounts of memory being hotplugged
> (in my test case, ~890GB), the call to flush_dcache_range takes
> a while (~50 seconds), triggering RCU stalls.
> 
> This patch breaks up the call into 16GB chunks, calling
> cond_resched() inbetween to allow the scheduler to run.
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>  arch/powerpc/mm/mem.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 5400da87a804..fb0d5e9aa11b 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
>  	return -ENODEV;
>  }
> 
> +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)

IMHO this begs for adding SZ_16G to include/linux/sizes.h and using it here

> +
>  int __ref arch_add_memory(int nid, u64 start, u64 size,
>  			struct mhp_restrictions *restrictions)
>  {
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
> +	unsigned long i;
>  	int rc;
> 
>  	resize_hpt_for_hotplug(memblock_phys_mem_size());
> @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>  			start, start + size, rc);
>  		return -EFAULT;
>  	}
> -	flush_dcache_range(start, start + size);
> +
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> +		cond_resched();
> +	}
> 
>  	return __add_pages(nid, start_pfn, nr_pages, restrictions);
>  }
> @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> +	unsigned long i;
>  	int ret;
> 
>  	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
> 
>  	/* Remove htab bolted mappings for this section of memory */
>  	start = (unsigned long)__va(start);
> -	flush_dcache_range(start, start + size);
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> +		cond_resched();
> +	}
> +
>  	ret = remove_section_mapping(start, start + size);
>  	WARN_ON_ONCE(ret);
> 
> -- 
> 2.21.0
>
Christophe Leroy Aug. 15, 2019, 7:36 a.m. UTC | #2
Le 15/08/2019 à 06:10, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> When presented with large amounts of memory being hotplugged
> (in my test case, ~890GB), the call to flush_dcache_range takes
> a while (~50 seconds), triggering RCU stalls.
> 
> This patch breaks up the call into 16GB chunks, calling
> cond_resched() inbetween to allow the scheduler to run.

Is 16GB small enough ? If 890GB takes 50s, 16GB still takes about 1s.
I'd use 1GB chuncks to remain below 100ms.

> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/mm/mem.c | 16 ++++++++++++++--
>   1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 5400da87a804..fb0d5e9aa11b 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
>   	return -ENODEV;
>   }
>   
> +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)

Can we use SZ_16GB ?

> +
>   int __ref arch_add_memory(int nid, u64 start, u64 size,
>   			struct mhp_restrictions *restrictions)
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> +	unsigned long i;
>   	int rc;
>   
>   	resize_hpt_for_hotplug(memblock_phys_mem_size());
> @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>   			start, start + size, rc);
>   		return -EFAULT;
>   	}
> -	flush_dcache_range(start, start + size);
> +
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));

Isn't the line a bit long (I have not checked).

> +		cond_resched();
> +	}
>   
>   	return __add_pages(nid, start_pfn, nr_pages, restrictions);
>   }
> @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
>   	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> +	unsigned long i;
>   	int ret;
>   
>   	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
>   
>   	/* Remove htab bolted mappings for this section of memory */
>   	start = (unsigned long)__va(start);
> -	flush_dcache_range(start, start + size);
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
> +		cond_resched();
> +	}
> +
>   	ret = remove_section_mapping(start, start + size);
>   	WARN_ON_ONCE(ret);
>   
> 

Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast.
https://www.avast.com/antivirus
Alastair D'Silva Aug. 16, 2019, 1:45 a.m. UTC | #3
On Thu, 2019-08-15 at 09:36 +0200, christophe leroy wrote:
> 
> Le 15/08/2019 à 06:10, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > When presented with large amounts of memory being hotplugged
> > (in my test case, ~890GB), the call to flush_dcache_range takes
> > a while (~50 seconds), triggering RCU stalls.
> > 
> > This patch breaks up the call into 16GB chunks, calling
> > cond_resched() inbetween to allow the scheduler to run.
> 
> Is 16GB small enough ? If 890GB takes 50s, 16GB still takes about 1s.
> I'd use 1GB chuncks to remain below 100ms.
> 
> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/mm/mem.c | 16 ++++++++++++++--
> >   1 file changed, 14 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index 5400da87a804..fb0d5e9aa11b 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned
> > long start, unsigned long end)
> >   	return -ENODEV;
> >   }
> >   
> > +#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
> 
> Can we use SZ_16GB ?
Sure, I'll go with 1GB as you recommended above

> > +
> >   int __ref arch_add_memory(int nid, u64 start, u64 size,
> >   			struct mhp_restrictions *restrictions)
> >   {
> >   	unsigned long start_pfn = start >> PAGE_SHIFT;
> >   	unsigned long nr_pages = size >> PAGE_SHIFT;
> > +	unsigned long i;
> >   	int rc;
> >   
> >   	resize_hpt_for_hotplug(memblock_phys_mem_size());
> > @@ -120,7 +123,11 @@ int __ref arch_add_memory(int nid, u64 start,
> > u64 size,
> >   			start, start + size, rc);
> >   		return -EFAULT;
> >   	}
> > -	flush_dcache_range(start, start + size);
> > +
> > +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > +		flush_dcache_range(start + i, min(start + size, start +
> > i + FLUSH_CHUNK_SIZE));
> 
> Isn't the line a bit long (I have not checked).
> 
> > +		cond_resched();
> > +	}
> >   
> >   	return __add_pages(nid, start_pfn, nr_pages, restrictions);
> >   }
> > @@ -131,13 +138,18 @@ void __ref arch_remove_memory(int nid, u64
> > start, u64 size,
> >   	unsigned long start_pfn = start >> PAGE_SHIFT;
> >   	unsigned long nr_pages = size >> PAGE_SHIFT;
> >   	struct page *page = pfn_to_page(start_pfn) +
> > vmem_altmap_offset(altmap);
> > +	unsigned long i;
> >   	int ret;
> >   
> >   	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
> >   
> >   	/* Remove htab bolted mappings for this section of memory */
> >   	start = (unsigned long)__va(start);
> > -	flush_dcache_range(start, start + size);
> > +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > +		flush_dcache_range(start + i, min(start + size, start +
> > i + FLUSH_CHUNK_SIZE));
> > +		cond_resched();
> > +	}
> > +
> >   	ret = remove_section_mapping(start, start + size);
> >   	WARN_ON_ONCE(ret);
> >   
> > 
> 
> Christophe
> 
> ---
> L'absence de virus dans ce courrier électronique a été vérifiée par
> le logiciel antivirus Avast.
> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.avast.com_antivirus&d=DwIDaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=cT4tgeEQ0Ll3SIlZDHE5AEXyKy6uKADMtf9_Eb7-vec&m=TBT2NNM2DXqDWHhSb_WdFPcfAjYk9hP2cvGksF001cQ&s=XURKAOQQ4h3_RhJlezSguD2kpSitAF-uBhQqVZLU4GU&e= 
>
diff mbox series

Patch

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5400da87a804..fb0d5e9aa11b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -104,11 +104,14 @@  int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
+#define FLUSH_CHUNK_SIZE (16ull * 1024ull * 1024ull * 1024ull)
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
 			struct mhp_restrictions *restrictions)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned long i;
 	int rc;
 
 	resize_hpt_for_hotplug(memblock_phys_mem_size());
@@ -120,7 +123,11 @@  int __ref arch_add_memory(int nid, u64 start, u64 size,
 			start, start + size, rc);
 		return -EFAULT;
 	}
-	flush_dcache_range(start, start + size);
+
+	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
+		cond_resched();
+	}
 
 	return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
@@ -131,13 +138,18 @@  void __ref arch_remove_memory(int nid, u64 start, u64 size,
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+	unsigned long i;
 	int ret;
 
 	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_dcache_range(start, start + size);
+	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+		flush_dcache_range(start + i, min(start + size, start + i + FLUSH_CHUNK_SIZE));
+		cond_resched();
+	}
+
 	ret = remove_section_mapping(start, start + size);
 	WARN_ON_ONCE(ret);