Message ID | 20141210070637.GA27377@us.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Michael Ellerman |
Headers | show |
On Tue, 2014-12-09 at 23:06 -0800, Sukadev Bhattiprolu wrote: > From 470c16c8955672103a9529c78dffbb239e9e27b8 Mon Sep 17 00:00:00 2001 > From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> > Date: Tue, 9 Dec 2014 22:17:46 -0500 > Subject: [PATCH 1/2] perf/poweprc/hv-24x7: Use per-cpu page buffer > > diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c > index dba3408..18e1f49 100644 > --- a/arch/powerpc/perf/hv-24x7.c > +++ b/arch/powerpc/perf/hv-24x7.c > @@ -217,11 +217,14 @@ static bool is_physical_domain(int domain) > domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; > } > > +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]); > +DEFINE_PER_CPU(char, hv_24x7_resb[4096]); Do we need it to be 4K aligned also? I would guess so. Rather than declaring these as char arrays and then casting below, can you pull the struct definitions up and then declare the per cpu variables with the proper type. > static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, > u16 lpar, u64 *res, > bool success_expected) > { > - unsigned long ret = -ENOMEM; > + unsigned long ret; > > /* > * request_buffer and result_buffer are not required to be 4k aligned, > @@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, > BUILD_BUG_ON(sizeof(*request_buffer) > 4096); > BUILD_BUG_ON(sizeof(*result_buffer) > 4096); > > - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); > - if (!request_buffer) > - goto out; > + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); > + result_buffer = (void *)get_cpu_var(hv_24x7_resb); > > - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); > - if (!result_buffer) > - goto out_free_request_buffer; > + memset(request_buffer, 0, 4096); > + memset(result_buffer, 0, 4096); Do we have to memset them? That's not going to speed things up. cheers
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index dba3408..18e1f49 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -217,11 +217,14 @@ static bool is_physical_domain(int domain) domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; } +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]); +DEFINE_PER_CPU(char, hv_24x7_resb[4096]); + static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret = -ENOMEM; + unsigned long ret; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, BUILD_BUG_ON(sizeof(*request_buffer) > 4096); BUILD_BUG_ON(sizeof(*result_buffer) > 4096); - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!request_buffer) - goto out; + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!result_buffer) - goto out_free_request_buffer; + memset(request_buffer, 0, 4096); + memset(result_buffer, 0, 4096); *request_buffer = (struct reqb) { .buf = { @@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, domain, offset, ix, lpar, ret, ret, result_buffer->buf.detailed_rc, result_buffer->buf.failing_request_ix); - goto out_free_result_buffer; + goto out; } *res = be64_to_cpu(result_buffer->result); -out_free_result_buffer: - kfree(result_buffer); -out_free_request_buffer: - kfree(request_buffer); out: return ret; }