diff mbox

[v2] powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack allocations

Message ID 1400798673-27522-1-git-send-email-cody@linux.vnet.ibm.com (mailing list archive)
State Accepted
Delegated to: Michael Ellerman
Headers show

Commit Message

Cody P Schafer May 22, 2014, 10:44 p.m. UTC
Ian pointed out the use of __aligned(4096) caused rather large stack
consumption in single_24x7_request(), so use the kmem_cache
hv_page_cache (which we've already got set up for other allocations)
insead of allocating locally.

Reported-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
In v2:
  - remove duplicate exit path


 arch/powerpc/perf/hv-24x7.c | 48 +++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 15 deletions(-)

Comments

Stephen Rothwell May 22, 2014, 11:49 p.m. UTC | #1
Hi Cody,

On Thu, 22 May 2014 15:44:25 -0700 Cody P Schafer <cody@linux.vnet.ibm.com> wrote:
>
>  	if (ret) {
>  		if (success_expected)
>  			pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
>  					domain, offset, ix, lpar,
>  					ret, ret,
> -					result_buffer.buf.detailed_rc,
> -					result_buffer.buf.failing_request_ix);
> -		return ret;
> +					result_buffer->buf.detailed_rc,
> +					result_buffer->buf.failing_request_ix);
> +		goto out_hcall;
>  	}
>  
> -	*res = be64_to_cpu(result_buffer.result);
> +	*res = be64_to_cpu(result_buffer->result);

not a biggie, but this last bit could be (remove the goto out_hcall and
teh label and then)

	} else {
		*res = be64_to_cpu(result_buffer->result);
	}

> +out_hcall:
> +	kfree(result_buffer);
> +out_resb:
> +	kfree(request_buffer);
> +out_reqb:
>  	return ret;
>  }
>  

otherwise looks good to me.
Cody P Schafer May 22, 2014, 11:54 p.m. UTC | #2
On 05/22/2014 04:49 PM, Stephen Rothwell wrote:
> Hi Cody,
>
> On Thu, 22 May 2014 15:44:25 -0700 Cody P Schafer <cody@linux.vnet.ibm.com> wrote:
>>
>>   	if (ret) {
>>   		if (success_expected)
>>   			pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
>>   					domain, offset, ix, lpar,
>>   					ret, ret,
>> -					result_buffer.buf.detailed_rc,
>> -					result_buffer.buf.failing_request_ix);
>> -		return ret;
>> +					result_buffer->buf.detailed_rc,
>> +					result_buffer->buf.failing_request_ix);
>> +		goto out_hcall;
>>   	}
>>
>> -	*res = be64_to_cpu(result_buffer.result);
>> +	*res = be64_to_cpu(result_buffer->result);
>
> not a biggie, but this last bit could be (remove the goto out_hcall and
> teh label and then)
>
> 	} else {
> 		*res = be64_to_cpu(result_buffer->result);
> 	}
>

I've got a slight preference toward keeping it as is, which lets all of 
the non-error path code stay outside of if/else blocks (and the error 
handling is kept ever so slightly more consistent).

>> +out_hcall:
>> +	kfree(result_buffer);
>> +out_resb:
>> +	kfree(request_buffer);
>> +out_reqb:
>>   	return ret;
>>   }
>>
>
> otherwise looks good to me.
>
Stephen Rothwell May 23, 2014, 12:20 a.m. UTC | #3
Hi Cody,

On Thu, 22 May 2014 16:54:07 -0700 Cody P Schafer <cody@linux.vnet.ibm.com> wrote:
>
> I've got a slight preference toward keeping it as is, which lets all of 
> the non-error path code stay outside of if/else blocks (and the error 
> handling is kept ever so slightly more consistent).

No problem.
diff mbox

Patch

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index e0766b8..998863b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -294,7 +294,7 @@  static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 					 u16 lpar, u64 *res,
 					 bool success_expected)
 {
-	unsigned long ret;
+	unsigned long ret = -ENOMEM;
 
 	/*
 	 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -304,7 +304,27 @@  static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 	struct reqb {
 		struct hv_24x7_request_buffer buf;
 		struct hv_24x7_request req;
-	} __packed __aligned(4096) request_buffer = {
+	} __packed *request_buffer;
+	struct resb {
+		struct hv_24x7_data_result_buffer buf;
+		struct hv_24x7_result res;
+		struct hv_24x7_result_element elem;
+		__be64 result;
+	} __packed *result_buffer;
+
+	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
+	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
+
+	request_buffer = kmem_cache_alloc(hv_page_cache, GFP_USER);
+
+	if (!request_buffer)
+		goto out_reqb;
+
+	result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
+	if (!result_buffer)
+		goto out_resb;
+
+	*request_buffer = (struct reqb) {
 		.buf = {
 			.interface_version = HV_24X7_IF_VERSION_CURRENT,
 			.num_requests = 1,
@@ -320,28 +340,26 @@  static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 		}
 	};
 
-	struct resb {
-		struct hv_24x7_data_result_buffer buf;
-		struct hv_24x7_result res;
-		struct hv_24x7_result_element elem;
-		__be64 result;
-	} __packed __aligned(4096) result_buffer = {};
-
 	ret = plpar_hcall_norets(H_GET_24X7_DATA,
-			virt_to_phys(&request_buffer), sizeof(request_buffer),
-			virt_to_phys(&result_buffer),  sizeof(result_buffer));
+			virt_to_phys(request_buffer), sizeof(*request_buffer),
+			virt_to_phys(result_buffer),  sizeof(*result_buffer));
 
 	if (ret) {
 		if (success_expected)
 			pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
 					domain, offset, ix, lpar,
 					ret, ret,
-					result_buffer.buf.detailed_rc,
-					result_buffer.buf.failing_request_ix);
-		return ret;
+					result_buffer->buf.detailed_rc,
+					result_buffer->buf.failing_request_ix);
+		goto out_hcall;
 	}
 
-	*res = be64_to_cpu(result_buffer.result);
+	*res = be64_to_cpu(result_buffer->result);
+out_hcall:
+	kfree(result_buffer);
+out_resb:
+	kfree(request_buffer);
+out_reqb:
 	return ret;
 }