diff mbox series

[v2,4/6] powerpc: Chunk calls to flush_dcache_range in arch_*_memory

Message ID 20190903052407.16638-5-alastair@au1.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: convert cache asm to C | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch next (c317052c95bef1f977b023158e5aa929215f443d)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Alastair D'Silva Sept. 3, 2019, 5:23 a.m. UTC
From: Alastair D'Silva <alastair@d-silva.org>

When presented with large amounts of memory being hotplugged
(in my test case, ~890GB), the call to flush_dcache_range takes
a while (~50 seconds), triggering RCU stalls.

This patch breaks up the call into 1GB chunks, calling
cond_resched() inbetween to allow the scheduler to run.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 arch/powerpc/mm/mem.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

Comments

Christophe Leroy Sept. 3, 2019, 6:19 a.m. UTC | #1
Le 03/09/2019 à 07:23, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> When presented with large amounts of memory being hotplugged
> (in my test case, ~890GB), the call to flush_dcache_range takes
> a while (~50 seconds), triggering RCU stalls.
> 
> This patch breaks up the call into 1GB chunks, calling
> cond_resched() inbetween to allow the scheduler to run.
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/mm/mem.c | 18 ++++++++++++++++--
>   1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index cd540123874d..854aaea2c6ae 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
>   	return -ENODEV;
>   }
>   
> +#define FLUSH_CHUNK_SIZE SZ_1G

Maybe the name is a bit long for a local define. See if we could reduce 
code line splits below by shortening this name.

> +
>   int __ref arch_add_memory(int nid, u64 start, u64 size,
>   			struct mhp_restrictions *restrictions)
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> +	u64 i;
>   	int rc;
>   
>   	resize_hpt_for_hotplug(memblock_phys_mem_size());
> @@ -120,7 +123,12 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>   			start, start + size, rc);
>   		return -EFAULT;
>   	}
> -	flush_dcache_range(start, start + size);
> +
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i,
> +				   min(start + size, start + i + FLUSH_CHUNK_SIZE));

My eyes don't like it.

What about
	for (; i < size; i += FLUSH_CHUNK_SIZE) {
		int len = min(size - i, FLUSH_CHUNK_SIZE);

		flush_dcache_range(start + i, start + i + len);
		cond_resched();
	}

or

	end = start + size;
	for (; start < end; start += FLUSH_CHUNK_SIZE, size -= FLUSH_CHUNK_SIZE) {
		int len = min(size, FLUSH_CHUNK_SIZE);

		flush_dcache_range(start, start + len);
		cond_resched();
	}

> +		cond_resched();
> +	}
>   
>   	return __add_pages(nid, start_pfn, nr_pages, restrictions);
>   }
> @@ -131,13 +139,19 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
>   	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> +	u64 i;
>   	int ret;
>   
>   	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
>   
>   	/* Remove htab bolted mappings for this section of memory */
>   	start = (unsigned long)__va(start);
> -	flush_dcache_range(start, start + size);
> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> +		flush_dcache_range(start + i,
> +				   min(start + size, start + i + FLUSH_CHUNK_SIZE));
> +		cond_resched();
> +	}
> +

This piece of code looks pretty similar to the one before. Can we 
refactor into a small helper ?

>   	ret = remove_section_mapping(start, start + size);
>   	WARN_ON_ONCE(ret);
>   
> 

Christophe
Alastair D'Silva Sept. 3, 2019, 6:25 a.m. UTC | #2
On Tue, 2019-09-03 at 08:19 +0200, Christophe Leroy wrote:
> 
> Le 03/09/2019 à 07:23, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > When presented with large amounts of memory being hotplugged
> > (in my test case, ~890GB), the call to flush_dcache_range takes
> > a while (~50 seconds), triggering RCU stalls.
> > 
> > This patch breaks up the call into 1GB chunks, calling
> > cond_resched() inbetween to allow the scheduler to run.
> > 
> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/mm/mem.c | 18 ++++++++++++++++--
> >   1 file changed, 16 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index cd540123874d..854aaea2c6ae 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned
> > long start, unsigned long end)
> >   	return -ENODEV;
> >   }
> >   
> > +#define FLUSH_CHUNK_SIZE SZ_1G
> 
> Maybe the name is a bit long for a local define. See if we could
> reduce 
> code line splits below by shortening this name.
> 
> > +
> >   int __ref arch_add_memory(int nid, u64 start, u64 size,
> >   			struct mhp_restrictions *restrictions)
> >   {
> >   	unsigned long start_pfn = start >> PAGE_SHIFT;
> >   	unsigned long nr_pages = size >> PAGE_SHIFT;
> > +	u64 i;
> >   	int rc;
> >   
> >   	resize_hpt_for_hotplug(memblock_phys_mem_size());
> > @@ -120,7 +123,12 @@ int __ref arch_add_memory(int nid, u64 start,
> > u64 size,
> >   			start, start + size, rc);
> >   		return -EFAULT;
> >   	}
> > -	flush_dcache_range(start, start + size);
> > +
> > +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > +		flush_dcache_range(start + i,
> > +				   min(start + size, start + i +
> > FLUSH_CHUNK_SIZE));
> 
> My eyes don't like it.
> 
> What about
> 	for (; i < size; i += FLUSH_CHUNK_SIZE) {
> 		int len = min(size - i, FLUSH_CHUNK_SIZE);
> 
> 		flush_dcache_range(start + i, start + i + len);
> 		cond_resched();
> 	}
> 
> or
> 
> 	end = start + size;
> 	for (; start < end; start += FLUSH_CHUNK_SIZE, size -=
> FLUSH_CHUNK_SIZE) {
> 		int len = min(size, FLUSH_CHUNK_SIZE);
> 
> 		flush_dcache_range(start, start + len);
> 		cond_resched();
> 	}
> 
> > +		cond_resched();
> > +	}
> >   
> >   	return __add_pages(nid, start_pfn, nr_pages, restrictions);
> >   }
> > @@ -131,13 +139,19 @@ void __ref arch_remove_memory(int nid, u64
> > start, u64 size,
> >   	unsigned long start_pfn = start >> PAGE_SHIFT;
> >   	unsigned long nr_pages = size >> PAGE_SHIFT;
> >   	struct page *page = pfn_to_page(start_pfn) +
> > vmem_altmap_offset(altmap);
> > +	u64 i;
> >   	int ret;
> >   
> >   	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
> >   
> >   	/* Remove htab bolted mappings for this section of memory */
> >   	start = (unsigned long)__va(start);
> > -	flush_dcache_range(start, start + size);
> > +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
> > +		flush_dcache_range(start + i,
> > +				   min(start + size, start + i +
> > FLUSH_CHUNK_SIZE));
> > +		cond_resched();
> > +	}
> > +
> 
> This piece of code looks pretty similar to the one before. Can we 
> refactor into a small helper ?
> 

Not much point, it's removed in a subsequent patch.
Christophe Leroy Sept. 3, 2019, 6:51 a.m. UTC | #3
Le 03/09/2019 à 08:25, Alastair D'Silva a écrit :
> On Tue, 2019-09-03 at 08:19 +0200, Christophe Leroy wrote:
>>
>> Le 03/09/2019 à 07:23, Alastair D'Silva a écrit :
>>> From: Alastair D'Silva <alastair@d-silva.org>
>>>
>>> When presented with large amounts of memory being hotplugged
>>> (in my test case, ~890GB), the call to flush_dcache_range takes
>>> a while (~50 seconds), triggering RCU stalls.
>>>
>>> This patch breaks up the call into 1GB chunks, calling
>>> cond_resched() inbetween to allow the scheduler to run.
>>>
>>> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
>>> ---
>>>    arch/powerpc/mm/mem.c | 18 ++++++++++++++++--
>>>    1 file changed, 16 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>>> index cd540123874d..854aaea2c6ae 100644
>>> --- a/arch/powerpc/mm/mem.c
>>> +++ b/arch/powerpc/mm/mem.c
>>> @@ -104,11 +104,14 @@ int __weak remove_section_mapping(unsigned
>>> long start, unsigned long end)
>>>    	return -ENODEV;
>>>    }
>>>    
>>> +#define FLUSH_CHUNK_SIZE SZ_1G
>>
>> Maybe the name is a bit long for a local define. See if we could
>> reduce
>> code line splits below by shortening this name.
>>
>>> +
>>>    int __ref arch_add_memory(int nid, u64 start, u64 size,
>>>    			struct mhp_restrictions *restrictions)
>>>    {
>>>    	unsigned long start_pfn = start >> PAGE_SHIFT;
>>>    	unsigned long nr_pages = size >> PAGE_SHIFT;
>>> +	u64 i;
>>>    	int rc;
>>>    
>>>    	resize_hpt_for_hotplug(memblock_phys_mem_size());
>>> @@ -120,7 +123,12 @@ int __ref arch_add_memory(int nid, u64 start,
>>> u64 size,
>>>    			start, start + size, rc);
>>>    		return -EFAULT;
>>>    	}
>>> -	flush_dcache_range(start, start + size);
>>> +
>>> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
>>> +		flush_dcache_range(start + i,
>>> +				   min(start + size, start + i +
>>> FLUSH_CHUNK_SIZE));
>>
>> My eyes don't like it.
>>
>> What about
>> 	for (; i < size; i += FLUSH_CHUNK_SIZE) {
>> 		int len = min(size - i, FLUSH_CHUNK_SIZE);
>>
>> 		flush_dcache_range(start + i, start + i + len);
>> 		cond_resched();
>> 	}
>>
>> or
>>
>> 	end = start + size;
>> 	for (; start < end; start += FLUSH_CHUNK_SIZE, size -=
>> FLUSH_CHUNK_SIZE) {
>> 		int len = min(size, FLUSH_CHUNK_SIZE);
>>
>> 		flush_dcache_range(start, start + len);
>> 		cond_resched();
>> 	}
>>
>>> +		cond_resched();
>>> +	}
>>>    
>>>    	return __add_pages(nid, start_pfn, nr_pages, restrictions);
>>>    }
>>> @@ -131,13 +139,19 @@ void __ref arch_remove_memory(int nid, u64
>>> start, u64 size,
>>>    	unsigned long start_pfn = start >> PAGE_SHIFT;
>>>    	unsigned long nr_pages = size >> PAGE_SHIFT;
>>>    	struct page *page = pfn_to_page(start_pfn) +
>>> vmem_altmap_offset(altmap);
>>> +	u64 i;
>>>    	int ret;
>>>    
>>>    	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
>>>    
>>>    	/* Remove htab bolted mappings for this section of memory */
>>>    	start = (unsigned long)__va(start);
>>> -	flush_dcache_range(start, start + size);
>>> +	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
>>> +		flush_dcache_range(start + i,
>>> +				   min(start + size, start + i +
>>> FLUSH_CHUNK_SIZE));
>>> +		cond_resched();
>>> +	}
>>> +
>>
>> This piece of code looks pretty similar to the one before. Can we
>> refactor into a small helper ?
>>
> 
> Not much point, it's removed in a subsequent patch.
> 

But you tell me that you leave to people the opportunity to not apply 
that subsequent patch, and that's the reason you didn't put that patch 
before this one. In that case adding a helper is worth it.

Christophe
Alastair D'Silva Sept. 4, 2019, 4:11 a.m. UTC | #4
On Tue, 2019-09-03 at 08:51 +0200, Christophe Leroy wrote:
<snip>

> > > This piece of code looks pretty similar to the one before. Can we
> > > refactor into a small helper ?
> > > 
> > 
> > Not much point, it's removed in a subsequent patch.
> > 
> 
> But you tell me that you leave to people the opportunity to not
> apply 
> that subsequent patch, and that's the reason you didn't put that
> patch 
> before this one. In that case adding a helper is worth it.
> 
> Christophe

I factored it out anyway, since it made the code much nicer to read.
diff mbox series

Patch

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cd540123874d..854aaea2c6ae 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -104,11 +104,14 @@  int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
+#define FLUSH_CHUNK_SIZE SZ_1G
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
 			struct mhp_restrictions *restrictions)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
+	u64 i;
 	int rc;
 
 	resize_hpt_for_hotplug(memblock_phys_mem_size());
@@ -120,7 +123,12 @@  int __ref arch_add_memory(int nid, u64 start, u64 size,
 			start, start + size, rc);
 		return -EFAULT;
 	}
-	flush_dcache_range(start, start + size);
+
+	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+		flush_dcache_range(start + i,
+				   min(start + size, start + i + FLUSH_CHUNK_SIZE));
+		cond_resched();
+	}
 
 	return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
@@ -131,13 +139,19 @@  void __ref arch_remove_memory(int nid, u64 start, u64 size,
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
+	u64 i;
 	int ret;
 
 	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_dcache_range(start, start + size);
+	for (i = 0; i < size; i += FLUSH_CHUNK_SIZE) {
+		flush_dcache_range(start + i,
+				   min(start + size, start + i + FLUSH_CHUNK_SIZE));
+		cond_resched();
+	}
+
 	ret = remove_section_mapping(start, start + size);
 	WARN_ON_ONCE(ret);