Message ID | 1433763511-5270-10-git-send-email-khandual@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On 06/08/2015 05:08 PM, Anshuman Khandual wrote:
> From: "khandual@linux.vnet.ibm.com" <khandual@linux.vnet.ibm.com>
This should be "Anshuman Khandual <khandual@linux.vnet.ibm.com>" and it happened
to couple of other patches in this series as well. I believe it got messed up in
a test machine, will fix it next time around.
Hi, On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote: > diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c > new file mode 100644 > index 0000000..13e6b72 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c > @@ -0,0 +1,513 @@ > +/* > + * BHRB filter test (HW & SW) > + * > + * Copyright 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ This should also be gpl2 only. > +#include <unistd.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <signal.h> > +#include <poll.h> > +#include <sys/shm.h> > +#include <sys/types.h> > +#include <sys/wait.h> > +#include <sys/mman.h> > + > +#include "bhrb_filters.h" > +#include "utils.h" > +#include "../event.h" > +#include "../lib.h" > + > +/* Fetched address counts */ > +#define ALL_MAX 32 > +#define CALL_MAX 12 > +#define RET_MAX 10 > +#define COND_MAX 8 > +#define IND_MAX 4 > + > +/* Test tunables */ > +#define LOOP_COUNT 10 > +#define SAMPLE_PERIOD 10000 > + > +static int branch_sample_type; > +static int branch_test_set[27] = { Do you need to explicitly provide the count here? > + PERF_SAMPLE_BRANCH_ANY_CALL, /* Single filters */ > + PERF_SAMPLE_BRANCH_ANY_RETURN, > + PERF_SAMPLE_BRANCH_COND, > + PERF_SAMPLE_BRANCH_IND_CALL, > + PERF_SAMPLE_BRANCH_ANY, > + > + PERF_SAMPLE_BRANCH_ANY_CALL | /* Tripple filters */ s/Tripple/Triple/ > + PERF_SAMPLE_BRANCH_ANY_RETURN | > + PERF_SAMPLE_BRANCH_COND, > + > + > +static void *ring_buffer_mask(struct ring_buffer *r, void *p) Is this actually returning a mask? It looks more like it's calculating an offset, and that seems to be how you use it below. > +{ > + unsigned long l = (unsigned long)p; > + > + return (void *)(r->ring_base + ((l - r->ring_base) & r->mask)); > +} That's a lot of casts, especially when you then load it into a int64_t pointer below... > + > +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r) > +{ > + unsigned long from, to, flag; > + int i, nr; > + int64_t *v; > + > + /* NR Branches */ > + v = ring_buffer_mask(r, hdr + 1); ...here. (and everywhere else I can see that you're using the ring_buffer_mask function) > + > + nr = *v; You are dereferencing a int64_t pointer into an int. Should nr be an int64_t? Or should v be a different pointer type? > + > + /* Branches */ > + for (i = 0; i < nr; i++) { > + v = ring_buffer_mask(r, v + 1); > + from = *v; Now you're dereferencing an *int64_t into an unsigned long. > + > + v = ring_buffer_mask(r, v + 1); > + to = *v; > + > + v = ring_buffer_mask(r, v + 1); > + flag = *v; > + > + if (!check_branch(from, to)) { > + has_failed = true; > + printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n", > + branch_sample_type, from, to, flag); > + } > + } > +} > + > +static void read_ring_buffer(struct event *e) > +{ > + struct ring_buffer *r = &e->ring_buffer; > + struct perf_event_header *hdr; > + int old, head; Why not tail and head? > + > + head = r->page->data_head & r->mask; > + > + asm volatile ("sync": : :"memory"); > + > + old = r->page->data_tail & r->mask; > + Can you explain the logic of syncing between reading head and tail? Is there an expectation that head is not likely to change? As a more general comment, what is sync trying to achieve? If you're trying to synchronise something, what's the sync actually achieving? Is there a corresponding memory barrier somewhere else? What race conditions are you trying to guard against and does this actually guard against them? > + while (old != head) { > + hdr = (struct perf_event_header *)(r->ring_base + old); > + > + if ((old & r->mask) + hdr->size != > + ((old + hdr->size) & r->mask)) > + ++record_overlap; > + > + if (hdr->type == PERF_RECORD_SAMPLE) { > + ++record_sample; > + dump_sample(hdr, r); > + } > + > + if (hdr->type == PERF_RECORD_MMAP) > + ++record_mmap; > + > + if (hdr->type == PERF_RECORD_LOST) > + ++record_lost; > + > + if (hdr->type == PERF_RECORD_THROTTLE) > + ++record_throttle; > + > + if (hdr->type == PERF_RECORD_UNTHROTTLE) > + ++record_unthrottle; > + > + old = (old + hdr->size) & r->mask; > + } > + r->page->data_tail = old; What happens if data_tail moves while you're doing the loop? > +static int filter_test(void) > +{ > + struct pollfd pollfd; > + struct event ebhrb; > + pid_t pid; > + int ret, loop = 0; > + > + has_failed = false; > + pid = fork(); > + if (pid == -1) { > + perror("fork() failed"); > + return 1; > + } > + > + /* Run child */ > + if (pid == 0) { > + start_loop(); > + exit(0); > + } > + > + /* Prepare event */ > + event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS, > + PERF_TYPE_HARDWARE, "insturctions"); Is instructions deliberately spelled incorrectly? > + ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; > + ebhrb.attr.disabled = 1; > + ebhrb.attr.mmap = 1; > + ebhrb.attr.mmap_data = 1; > + ebhrb.attr.sample_period = SAMPLE_PERIOD; > + ebhrb.attr.exclude_user = 0; > + ebhrb.attr.exclude_kernel = 1; > + ebhrb.attr.exclude_hv = 1; > + ebhrb.attr.branch_sample_type = branch_sample_type; > + > + /* Open event */ > + event_open_with_pid(&ebhrb, pid); > + > + /* Mmap ring buffer and enable event */ > + event_mmap(&ebhrb); > + FAIL_IF(event_enable(&ebhrb)); > + > + /* Prepare polling */ > + pollfd.fd = ebhrb.fd; > + pollfd.events = POLLIN; > + > + for (loop = 0; loop < LOOP_COUNT; loop++) { > + ret = poll(&pollfd, 1, -1); > + if (ret == -1) { > + perror("poll() failed"); > + return 1; > + } > + if (ret == 0) { > + perror("poll() timeout"); > + return 1; > + } > + read_ring_buffer(&ebhrb); > + if (has_failed) > + return 1; 1) I don't see anything that sets has_failed after it's initalised. 2) Should these error cases also explicitly terminate the child? Do you need something like this perhaps? if (ret == 0) { perror("poll() failed"); goto err; } ... } ... return 0; err: kill(pid, SIGTERM); // maybe even sigkill in the error case? return 1; > + } > + > + /* Disable and close event */ > + FAIL_IF(event_disable(&ebhrb)); > + event_close(&ebhrb); Again, do these need to be replicated in the error path? > + > + /* Terminate child */ > + kill(pid, SIGKILL); SIGKILL seems a bit harsh: wouldn't SIGTERM work? > + return 0; > +} > + > +static int bhrb_filters_test(void) > +{ > + int i; > + > + /* Fetch branches */ > + fetch_branches(); > + init_branch_stats(); > + init_perf_mmap_stats(); > + > + for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) { > + branch_sample_type = branch_test_set[i]; > + if (filter_test()) Couldn't branch_sample_type be passed to filter_test as a parameter, rather than as a global? > + return 1; > + } > + > diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h > new file mode 100644 > index 0000000..072375a > --- /dev/null > +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h > @@ -0,0 +1,16 @@ > +/* > + * Copyright 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. License again. (And in the other files in this patch.) > +_GLOBAL(start_loop) > +label: > + b label0 /* ANY */ > + blr /* ANY_RETURN */ > +label0: > + b label1 /* ANY */ > + > +label1: > + b label2 /* ANY */ > + > +label2: > + b label3 /* ANY */ > + > +label3: > + mflr LR_SAVE > + bl label4 /* ANY | ANY_CALL */ > + mtlr LR_SAVE > + b start_loop /* ANY */ > +label4: > + mflr LR_SAVE > + li 20, 12 > + cmpi 3, 20, 12 > + bcl 12, 4 * cr3+2, label5 /* ANY | ANY_CALL | COND */ > + li 20, 12 > + cmpi 4, 20, 20 > + bcl 12, 4 * cr4+0, label5 /* ANY | ANY_CALL | COND */ > + LOAD_ADDR(20, label5) > + mtctr 20 > + li 22, 10 > + cmpi 0, 22, 10 > + bcctrl 12, 4*cr0+2 /* ANY | NY_CALL | IND_CALL | COND */ > + LOAD_ADDR(20, label5) > + mtlr 20 > + li 20, 10 > + cmpi 0, 20, 10 > + bclrl 12, 4*cr0+2 /* ANY | ANY_CALL | IND_CALL | COND */ > + mtlr LR_SAVE > + blr /* ANY | ANY_RETURN */ > + > +label5: > + blr /* ANY | ANY_RETURN */ > + Could these labels have more descriptive names? Regards, Daniel
On 06/11/2015 07:39 AM, Daniel Axtens wrote: > Hi, > > On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote: >> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >> new file mode 100644 >> index 0000000..13e6b72 >> --- /dev/null >> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >> @@ -0,0 +1,513 @@ >> +/* >> + * BHRB filter test (HW & SW) >> + * >> + * Copyright 2015 Anshuman Khandual, IBM Corporation. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License >> + * as published by the Free Software Foundation; either version >> + * 2 of the License, or (at your option) any later version. >> + */ > > This should also be gpl2 only. Why ? Any special reason ? I can see similar existing statements here in this file as well "powerpcC/primitives/load_unaligned_zeropad.c" >> +#include <unistd.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <signal.h> >> +#include <poll.h> >> +#include <sys/shm.h> >> +#include <sys/types.h> >> +#include <sys/wait.h> >> +#include <sys/mman.h> >> + >> +#include "bhrb_filters.h" >> +#include "utils.h" >> +#include "../event.h" >> +#include "../lib.h" >> + >> +/* Fetched address counts */ >> +#define ALL_MAX 32 >> +#define CALL_MAX 12 >> +#define RET_MAX 10 >> +#define COND_MAX 8 >> +#define IND_MAX 4 >> + >> +/* Test tunables */ >> +#define LOOP_COUNT 10 >> +#define SAMPLE_PERIOD 10000 >> + >> +static int branch_sample_type; >> +static int branch_test_set[27] = { > Do you need to explicitly provide the count here? Not really, will fix it. >> + PERF_SAMPLE_BRANCH_ANY_CALL, /* Single filters */ >> + PERF_SAMPLE_BRANCH_ANY_RETURN, >> + PERF_SAMPLE_BRANCH_COND, >> + PERF_SAMPLE_BRANCH_IND_CALL, >> + PERF_SAMPLE_BRANCH_ANY, >> + > >> + PERF_SAMPLE_BRANCH_ANY_CALL | /* Tripple filters */ > s/Tripple/Triple/ Sure.will fix it. >> + PERF_SAMPLE_BRANCH_ANY_RETURN | >> + PERF_SAMPLE_BRANCH_COND, >> + > > >> + >> +static void *ring_buffer_mask(struct ring_buffer *r, void *p) > Is this actually returning a mask? It looks more like it's calculating > an offset, and that seems to be how you use it below. Yeah it does calculate an offset. Will change the function name to ring_buffer_offset instead. >> +{ >> + unsigned long l = (unsigned long)p; >> + >> + return (void *)(r->ring_base + ((l - r->ring_base) & r->mask)); >> +} > That's a lot of casts, especially when you then load it into a int64_t > pointer below... Will it cause any problem ? I can return here int64_t * instead to void * to match the receiving pointer. >> + >> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r) >> +{ >> + unsigned long from, to, flag; >> + int i, nr; >> + int64_t *v; >> + >> + /* NR Branches */ >> + v = ring_buffer_mask(r, hdr + 1); > ...here. (and everywhere else I can see that you're using the > ring_buffer_mask function) >> + >> + nr = *v; > You are dereferencing a int64_t pointer into an int. Should nr be an > int64_t? Or should v be a different pointer type? hmm, int64_t sounds good. > >> + >> + /* Branches */ >> + for (i = 0; i < nr; i++) { >> + v = ring_buffer_mask(r, v + 1); >> + from = *v; > Now you're dereferencing an *int64_t into an unsigned long. Just wanted to have 64 bits for that field. To achieve some uniformity will change all of v, nr, from, to, flags as int64_t type variables. Also will make ring_buffer_mask function return in64_t * pointer type instead. Will change ring_buffer_mask into ring_buffer_offset as well. >> + >> + v = ring_buffer_mask(r, v + 1); >> + to = *v; >> + >> + v = ring_buffer_mask(r, v + 1); >> + flag = *v; >> + >> + if (!check_branch(from, to)) { >> + has_failed = true; >> + printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n", >> + branch_sample_type, from, to, flag); >> + } >> + } >> +} >> + >> +static void read_ring_buffer(struct event *e) >> +{ >> + struct ring_buffer *r = &e->ring_buffer; >> + struct perf_event_header *hdr; >> + int old, head; > Why not tail and head? Sure, will change it. >> + >> + head = r->page->data_head & r->mask; >> + >> + asm volatile ("sync": : :"memory"); >> + >> + old = r->page->data_tail & r->mask; >> + > Can you explain the logic of syncing between reading head and tail? Is > there an expectation that head is not likely to change? > > As a more general comment, what is sync trying to achieve? If you're > trying to synchronise something, what's the sync actually achieving? Is > there a corresponding memory barrier somewhere else? What race > conditions are you trying to guard against and does this actually guard > against them? Once we wake up from poll and try to read the ring buffer, head is not going to change. We keep increment the tail and process the record till we dont reach the head position. Once there we just write head into the data_tail to inform kernel that the processing is over and kernel may start filling up the ring buffer again. For more details , please look into this file include/uapi/linux/perf_event.h (perf_event_mmap_page). /* * Control data for the mmap() data buffer. * * User-space reading the @data_head value should issue an smp_rmb(), * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be * written by userspace to reflect the last read data, after issueing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. > >> + while (old != head) { >> + hdr = (struct perf_event_header *)(r->ring_base + old); >> + >> + if ((old & r->mask) + hdr->size != >> + ((old + hdr->size) & r->mask)) >> + ++record_overlap; >> + >> + if (hdr->type == PERF_RECORD_SAMPLE) { >> + ++record_sample; >> + dump_sample(hdr, r); >> + } >> + >> + if (hdr->type == PERF_RECORD_MMAP) >> + ++record_mmap; >> + >> + if (hdr->type == PERF_RECORD_LOST) >> + ++record_lost; >> + >> + if (hdr->type == PERF_RECORD_THROTTLE) >> + ++record_throttle; >> + >> + if (hdr->type == PERF_RECORD_UNTHROTTLE) >> + ++record_unthrottle; >> + >> + old = (old + hdr->size) & r->mask; >> + } >> + r->page->data_tail = old; > What happens if data_tail moves while you're doing the loop? I believe that is not possible. Kernel wont change it unless we we write head position into that after processing entire data. > > > >> +static int filter_test(void) >> +{ >> + struct pollfd pollfd; >> + struct event ebhrb; >> + pid_t pid; >> + int ret, loop = 0; >> + >> + has_failed = false; >> + pid = fork(); >> + if (pid == -1) { >> + perror("fork() failed"); >> + return 1; >> + } >> + >> + /* Run child */ >> + if (pid == 0) { >> + start_loop(); >> + exit(0); >> + } >> + >> + /* Prepare event */ >> + event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS, >> + PERF_TYPE_HARDWARE, "insturctions"); > Is instructions deliberately spelled incorrectly? :) No, its a mistake. Will fix it. >> + ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; >> + ebhrb.attr.disabled = 1; >> + ebhrb.attr.mmap = 1; >> + ebhrb.attr.mmap_data = 1; >> + ebhrb.attr.sample_period = SAMPLE_PERIOD; >> + ebhrb.attr.exclude_user = 0; >> + ebhrb.attr.exclude_kernel = 1; >> + ebhrb.attr.exclude_hv = 1; >> + ebhrb.attr.branch_sample_type = branch_sample_type; >> + >> + /* Open event */ >> + event_open_with_pid(&ebhrb, pid); >> + >> + /* Mmap ring buffer and enable event */ >> + event_mmap(&ebhrb); >> + FAIL_IF(event_enable(&ebhrb)); >> + >> + /* Prepare polling */ >> + pollfd.fd = ebhrb.fd; >> + pollfd.events = POLLIN; >> + >> + for (loop = 0; loop < LOOP_COUNT; loop++) { >> + ret = poll(&pollfd, 1, -1); >> + if (ret == -1) { >> + perror("poll() failed"); >> + return 1; >> + } >> + if (ret == 0) { >> + perror("poll() timeout"); >> + return 1; >> + } >> + read_ring_buffer(&ebhrb); >> + if (has_failed) >> + return 1; > 1) I don't see anything that sets has_failed after it's initalised. Its initialized to 'false' in the beginning of each test and would be set to 'true' inside dump_sample function if any of the captured branches does not match the applied filter. > 2) Should these error cases also explicitly terminate the child? Do you > need something like this perhaps? > > if (ret == 0) { > perror("poll() failed"); > goto err; > } > > ... > > } > > ... > > return 0; > > err: > kill(pid, SIGTERM); // maybe even sigkill in the error case? > return 1; Right, will change it. >> + } >> + >> + /* Disable and close event */ >> + FAIL_IF(event_disable(&ebhrb)); >> + event_close(&ebhrb); > Again, do these need to be replicated in the error path? Right, will change it. >> + >> + /* Terminate child */ >> + kill(pid, SIGKILL); > SIGKILL seems a bit harsh: wouldn't SIGTERM work? >> + return 0; >> +} >> + >> +static int bhrb_filters_test(void) >> +{ >> + int i; >> + >> + /* Fetch branches */ >> + fetch_branches(); >> + init_branch_stats(); >> + init_perf_mmap_stats(); >> + >> + for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) { >> + branch_sample_type = branch_test_set[i]; >> + if (filter_test()) > Couldn't branch_sample_type be passed to filter_test as a parameter, > rather than as a global? Yeah it can be. Will change it. >> + return 1; >> + } >> + > > >> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h >> new file mode 100644 >> index 0000000..072375a >> --- /dev/null >> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h >> @@ -0,0 +1,16 @@ >> +/* >> + * Copyright 2015 Anshuman Khandual, IBM Corporation. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License >> + * as published by the Free Software Foundation; either version >> + * 2 of the License, or (at your option) any later version. > License again. (And in the other files in this patch.) > > >> +_GLOBAL(start_loop) >> +label: >> + b label0 /* ANY */ >> + blr /* ANY_RETURN */ >> +label0: >> + b label1 /* ANY */ >> + >> +label1: >> + b label2 /* ANY */ >> + >> +label2: >> + b label3 /* ANY */ >> + >> +label3: >> + mflr LR_SAVE >> + bl label4 /* ANY | ANY_CALL */ >> + mtlr LR_SAVE >> + b start_loop /* ANY */ >> +label4: >> + mflr LR_SAVE >> + li 20, 12 >> + cmpi 3, 20, 12 >> + bcl 12, 4 * cr3+2, label5 /* ANY | ANY_CALL | COND */ >> + li 20, 12 >> + cmpi 4, 20, 20 >> + bcl 12, 4 * cr4+0, label5 /* ANY | ANY_CALL | COND */ >> + LOAD_ADDR(20, label5) >> + mtctr 20 >> + li 22, 10 >> + cmpi 0, 22, 10 >> + bcctrl 12, 4*cr0+2 /* ANY | NY_CALL | IND_CALL | COND */ >> + LOAD_ADDR(20, label5) >> + mtlr 20 >> + li 20, 10 >> + cmpi 0, 20, 10 >> + bclrl 12, 4*cr0+2 /* ANY | ANY_CALL | IND_CALL | COND */ >> + mtlr LR_SAVE >> + blr /* ANY | ANY_RETURN */ >> + >> +label5: >> + blr /* ANY | ANY_RETURN */ >> + > Could these labels have more descriptive names? Initially I had one label with same numeric numbering for each kind of branch instruction it was intended for but then it seemed to be overkill for that purpose. Compacted them to accommodate more branches per label and here we are. What kind of descriptive names will each of these label assume when each one accommodates multiple branches now ? Any thoughts ? Though I am willing to change them.
On Friday 12 June 2015 12:32 PM, Anshuman Khandual wrote: > On 06/11/2015 07:39 AM, Daniel Axtens wrote: >> Hi, >> >> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote: >>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >>> new file mode 100644 >>> index 0000000..13e6b72 >>> --- /dev/null >>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >>> @@ -0,0 +1,513 @@ >>> +/* >>> + * BHRB filter test (HW & SW) >>> + * >>> + * Copyright 2015 Anshuman Khandual, IBM Corporation. >>> + * >>> + * This program is free software; you can redistribute it and/or >>> + * modify it under the terms of the GNU General Public License >>> + * as published by the Free Software Foundation; either version >>> + * 2 of the License, or (at your option) any later version. >>> + */ >> This should also be gpl2 only. > Why ? Any special reason ? I can see similar existing statements here > in this file as well "powerpcC/primitives/load_unaligned_zeropad.c" For the new files, mpe suggested to use gpl2 only version of the license. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation. Also, preferred format for Copyright line is to have "(C)" next to word Copyright http://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/tree/COPYING#n9 Maddy >>> +#include <unistd.h> >>> +#include <stdio.h> >>> +#include <stdlib.h> >>> +#include <signal.h> >>> +#include <poll.h> >>> +#include <sys/shm.h> >>> +#include <sys/types.h> >>> +#include <sys/wait.h> >>> +#include <sys/mman.h> >>> + >>> +#include "bhrb_filters.h" >>> +#include "utils.h" >>> +#include "../event.h" >>> +#include "../lib.h" >>> + >>> +/* Fetched address counts */ >>> +#define ALL_MAX 32 >>> +#define CALL_MAX 12 >>> +#define RET_MAX 10 >>> +#define COND_MAX 8 >>> +#define IND_MAX 4 >>> + >>> +/* Test tunables */ >>> +#define LOOP_COUNT 10 >>> +#define SAMPLE_PERIOD 10000 >>> + >>> +static int branch_sample_type; >>> +static int branch_test_set[27] = { >> Do you need to explicitly provide the count here? > Not really, will fix it. > >>> + PERF_SAMPLE_BRANCH_ANY_CALL, /* Single filters */ >>> + PERF_SAMPLE_BRANCH_ANY_RETURN, >>> + PERF_SAMPLE_BRANCH_COND, >>> + PERF_SAMPLE_BRANCH_IND_CALL, >>> + PERF_SAMPLE_BRANCH_ANY, >>> + >>> + PERF_SAMPLE_BRANCH_ANY_CALL | /* Tripple filters */ >> s/Tripple/Triple/ > Sure.will fix it. > >>> + PERF_SAMPLE_BRANCH_ANY_RETURN | >>> + PERF_SAMPLE_BRANCH_COND, >>> + >> >>> + >>> +static void *ring_buffer_mask(struct ring_buffer *r, void *p) >> Is this actually returning a mask? It looks more like it's calculating >> an offset, and that seems to be how you use it below. > Yeah it does calculate an offset. Will change the function name to > ring_buffer_offset instead. > >>> +{ >>> + unsigned long l = (unsigned long)p; >>> + >>> + return (void *)(r->ring_base + ((l - r->ring_base) & r->mask)); >>> +} >> That's a lot of casts, especially when you then load it into a int64_t >> pointer below... > Will it cause any problem ? I can return here int64_t * instead to void * > to match the receiving pointer. > >>> + >>> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r) >>> +{ >>> + unsigned long from, to, flag; >>> + int i, nr; >>> + int64_t *v; >>> + >>> + /* NR Branches */ >>> + v = ring_buffer_mask(r, hdr + 1); >> ...here. (and everywhere else I can see that you're using the >> ring_buffer_mask function) >>> + >>> + nr = *v; >> You are dereferencing a int64_t pointer into an int. Should nr be an >> int64_t? Or should v be a different pointer type? > hmm, int64_t sounds good. > >>> + >>> + /* Branches */ >>> + for (i = 0; i < nr; i++) { >>> + v = ring_buffer_mask(r, v + 1); >>> + from = *v; >> Now you're dereferencing an *int64_t into an unsigned long. > Just wanted to have 64 bits for that field. To achieve some uniformity > will change all of v, nr, from, to, flags as int64_t type variables. > Also will make ring_buffer_mask function return in64_t * pointer type > instead. Will change ring_buffer_mask into ring_buffer_offset as well. > >>> + >>> + v = ring_buffer_mask(r, v + 1); >>> + to = *v; >>> + >>> + v = ring_buffer_mask(r, v + 1); >>> + flag = *v; >>> + >>> + if (!check_branch(from, to)) { >>> + has_failed = true; >>> + printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n", >>> + branch_sample_type, from, to, flag); >>> + } >>> + } >>> +} >>> + >>> +static void read_ring_buffer(struct event *e) >>> +{ >>> + struct ring_buffer *r = &e->ring_buffer; >>> + struct perf_event_header *hdr; >>> + int old, head; >> Why not tail and head? > Sure, will change it. > >>> + >>> + head = r->page->data_head & r->mask; >>> + >>> + asm volatile ("sync": : :"memory"); >>> + >>> + old = r->page->data_tail & r->mask; >>> + >> Can you explain the logic of syncing between reading head and tail? Is >> there an expectation that head is not likely to change? >> >> As a more general comment, what is sync trying to achieve? If you're >> trying to synchronise something, what's the sync actually achieving? Is >> there a corresponding memory barrier somewhere else? What race >> conditions are you trying to guard against and does this actually guard >> against them? > Once we wake up from poll and try to read the ring buffer, head is not > going to change. We keep increment the tail and process the record till > we dont reach the head position. Once there we just write head into the > data_tail to inform kernel that the processing is over and kernel may > start filling up the ring buffer again. For more details , please look > into this file include/uapi/linux/perf_event.h (perf_event_mmap_page). > > /* > * Control data for the mmap() data buffer. > * > * User-space reading the @data_head value should issue an smp_rmb(), > * after reading this value. > * > * When the mapping is PROT_WRITE the @data_tail value should be > * written by userspace to reflect the last read data, after issueing > * an smp_mb() to separate the data read from the ->data_tail store. > * In this case the kernel will not over-write unread data. > >>> + while (old != head) { >>> + hdr = (struct perf_event_header *)(r->ring_base + old); >>> + >>> + if ((old & r->mask) + hdr->size != >>> + ((old + hdr->size) & r->mask)) >>> + ++record_overlap; >>> + >>> + if (hdr->type == PERF_RECORD_SAMPLE) { >>> + ++record_sample; >>> + dump_sample(hdr, r); >>> + } >>> + >>> + if (hdr->type == PERF_RECORD_MMAP) >>> + ++record_mmap; >>> + >>> + if (hdr->type == PERF_RECORD_LOST) >>> + ++record_lost; >>> + >>> + if (hdr->type == PERF_RECORD_THROTTLE) >>> + ++record_throttle; >>> + >>> + if (hdr->type == PERF_RECORD_UNTHROTTLE) >>> + ++record_unthrottle; >>> + >>> + old = (old + hdr->size) & r->mask; >>> + } >>> + r->page->data_tail = old; >> What happens if data_tail moves while you're doing the loop? > I believe that is not possible. Kernel wont change it unless we > we write head position into that after processing entire data. > >> >> >>> +static int filter_test(void) >>> +{ >>> + struct pollfd pollfd; >>> + struct event ebhrb; >>> + pid_t pid; >>> + int ret, loop = 0; >>> + >>> + has_failed = false; >>> + pid = fork(); >>> + if (pid == -1) { >>> + perror("fork() failed"); >>> + return 1; >>> + } >>> + >>> + /* Run child */ >>> + if (pid == 0) { >>> + start_loop(); >>> + exit(0); >>> + } >>> + >>> + /* Prepare event */ >>> + event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS, >>> + PERF_TYPE_HARDWARE, "insturctions"); >> Is instructions deliberately spelled incorrectly? > :) No, its a mistake. Will fix it. > >>> + ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; >>> + ebhrb.attr.disabled = 1; >>> + ebhrb.attr.mmap = 1; >>> + ebhrb.attr.mmap_data = 1; >>> + ebhrb.attr.sample_period = SAMPLE_PERIOD; >>> + ebhrb.attr.exclude_user = 0; >>> + ebhrb.attr.exclude_kernel = 1; >>> + ebhrb.attr.exclude_hv = 1; >>> + ebhrb.attr.branch_sample_type = branch_sample_type; >>> + >>> + /* Open event */ >>> + event_open_with_pid(&ebhrb, pid); >>> + >>> + /* Mmap ring buffer and enable event */ >>> + event_mmap(&ebhrb); >>> + FAIL_IF(event_enable(&ebhrb)); >>> + >>> + /* Prepare polling */ >>> + pollfd.fd = ebhrb.fd; >>> + pollfd.events = POLLIN; >>> + >>> + for (loop = 0; loop < LOOP_COUNT; loop++) { >>> + ret = poll(&pollfd, 1, -1); >>> + if (ret == -1) { >>> + perror("poll() failed"); >>> + return 1; >>> + } >>> + if (ret == 0) { >>> + perror("poll() timeout"); >>> + return 1; >>> + } >>> + read_ring_buffer(&ebhrb); >>> + if (has_failed) >>> + return 1; >> 1) I don't see anything that sets has_failed after it's initalised. > Its initialized to 'false' in the beginning of each test and would be > set to 'true' inside dump_sample function if any of the captured branches > does not match the applied filter. > >> 2) Should these error cases also explicitly terminate the child? Do you >> need something like this perhaps? >> >> if (ret == 0) { >> perror("poll() failed"); >> goto err; >> } >> >> ... >> >> } >> >> ... >> >> return 0; >> >> err: >> kill(pid, SIGTERM); // maybe even sigkill in the error case? >> return 1; > Right, will change it. > >>> + } >>> + >>> + /* Disable and close event */ >>> + FAIL_IF(event_disable(&ebhrb)); >>> + event_close(&ebhrb); >> Again, do these need to be replicated in the error path? > Right, will change it. > >>> + >>> + /* Terminate child */ >>> + kill(pid, SIGKILL); >> SIGKILL seems a bit harsh: wouldn't SIGTERM work? >>> + return 0; >>> +} >>> + >>> +static int bhrb_filters_test(void) >>> +{ >>> + int i; >>> + >>> + /* Fetch branches */ >>> + fetch_branches(); >>> + init_branch_stats(); >>> + init_perf_mmap_stats(); >>> + >>> + for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) { >>> + branch_sample_type = branch_test_set[i]; >>> + if (filter_test()) >> Couldn't branch_sample_type be passed to filter_test as a parameter, >> rather than as a global? > Yeah it can be. Will change it. > >>> + return 1; >>> + } >>> + >> >>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h >>> new file mode 100644 >>> index 0000000..072375a >>> --- /dev/null >>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h >>> @@ -0,0 +1,16 @@ >>> +/* >>> + * Copyright 2015 Anshuman Khandual, IBM Corporation. >>> + * >>> + * This program is free software; you can redistribute it and/or >>> + * modify it under the terms of the GNU General Public License >>> + * as published by the Free Software Foundation; either version >>> + * 2 of the License, or (at your option) any later version. >> License again. (And in the other files in this patch.) >> >> >>> +_GLOBAL(start_loop) >>> +label: >>> + b label0 /* ANY */ >>> + blr /* ANY_RETURN */ >>> +label0: >>> + b label1 /* ANY */ >>> + >>> +label1: >>> + b label2 /* ANY */ >>> + >>> +label2: >>> + b label3 /* ANY */ >>> + >>> +label3: >>> + mflr LR_SAVE >>> + bl label4 /* ANY | ANY_CALL */ >>> + mtlr LR_SAVE >>> + b start_loop /* ANY */ >>> +label4: >>> + mflr LR_SAVE >>> + li 20, 12 >>> + cmpi 3, 20, 12 >>> + bcl 12, 4 * cr3+2, label5 /* ANY | ANY_CALL | COND */ >>> + li 20, 12 >>> + cmpi 4, 20, 20 >>> + bcl 12, 4 * cr4+0, label5 /* ANY | ANY_CALL | COND */ >>> + LOAD_ADDR(20, label5) >>> + mtctr 20 >>> + li 22, 10 >>> + cmpi 0, 22, 10 >>> + bcctrl 12, 4*cr0+2 /* ANY | NY_CALL | IND_CALL | COND */ >>> + LOAD_ADDR(20, label5) >>> + mtlr 20 >>> + li 20, 10 >>> + cmpi 0, 20, 10 >>> + bclrl 12, 4*cr0+2 /* ANY | ANY_CALL | IND_CALL | COND */ >>> + mtlr LR_SAVE >>> + blr /* ANY | ANY_RETURN */ >>> + >>> +label5: >>> + blr /* ANY | ANY_RETURN */ >>> + >> Could these labels have more descriptive names? > Initially I had one label with same numeric numbering for each kind of > branch instruction it was intended for but then it seemed to be overkill > for that purpose. Compacted them to accommodate more branches per label > and here we are. What kind of descriptive names will each of these > label assume when each one accommodates multiple branches now ? Any > thoughts ? Though I am willing to change them. > > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev
On 06/12/2015 12:56 PM, Madhavan Srinivasan wrote: > > On Friday 12 June 2015 12:32 PM, Anshuman Khandual wrote: >> > On 06/11/2015 07:39 AM, Daniel Axtens wrote: >>> >> Hi, >>> >> >>> >> On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote: >>>> >>> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >>>> >>> new file mode 100644 >>>> >>> index 0000000..13e6b72 >>>> >>> --- /dev/null >>>> >>> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c >>>> >>> @@ -0,0 +1,513 @@ >>>> >>> +/* >>>> >>> + * BHRB filter test (HW & SW) >>>> >>> + * >>>> >>> + * Copyright 2015 Anshuman Khandual, IBM Corporation. >>>> >>> + * >>>> >>> + * This program is free software; you can redistribute it and/or >>>> >>> + * modify it under the terms of the GNU General Public License >>>> >>> + * as published by the Free Software Foundation; either version >>>> >>> + * 2 of the License, or (at your option) any later version. >>>> >>> + */ >>> >> This should also be gpl2 only. >> > Why ? Any special reason ? I can see similar existing statements here >> > in this file as well "powerpcC/primitives/load_unaligned_zeropad.c" > For the new files, mpe suggested to use gpl2 only version of the license. > > This program is free software; you can redistribute it and/or modify it > under the terms of the GNU General Public License version 2 as published > by the Free Software Foundation. > > Also, preferred format for Copyright line is to have "(C)" next to word > Copyright Sure, will accommodate both the proposed changes next time around.
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index a9099d9..2e103fd 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -4,7 +4,7 @@ noarg: TEST_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c -all: $(TEST_PROGS) ebb +all: $(TEST_PROGS) ebb bhrb $(TEST_PROGS): $(EXTRA_SOURCES) @@ -18,25 +18,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) $(MAKE) -C ebb run_tests + $(MAKE) -C bhrb run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) $(MAKE) -s -C ebb emit_tests + $(MAKE) -s -C bhrb emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) $(MAKE) -C ebb install + $(MAKE) -C bhrb install endef clean: rm -f $(TEST_PROGS) loop.o $(MAKE) -C ebb clean + $(MAKE) -C bhrb clean ebb: $(MAKE) -k -C $@ all -.PHONY: all run_tests clean ebb +bhrb: + $(MAKE) -k -C $@ all + +.PHONY: all run_tests clean ebb bhrb diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/Makefile b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile new file mode 100644 index 0000000..61c032a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/bhrb/Makefile @@ -0,0 +1,13 @@ +noarg: + $(MAKE) -C ../../ + +TEST_PROGS := bhrb_filters + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../../harness.c ../event.c ../lib.c bhrb_filters_asm.S + +include ../../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c new file mode 100644 index 0000000..13e6b72 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c @@ -0,0 +1,513 @@ +/* + * BHRB filter test (HW & SW) + * + * Copyright 2015 Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <poll.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/mman.h> + +#include "bhrb_filters.h" +#include "utils.h" +#include "../event.h" +#include "../lib.h" + +/* Fetched address counts */ +#define ALL_MAX 32 +#define CALL_MAX 12 +#define RET_MAX 10 +#define COND_MAX 8 +#define IND_MAX 4 + +/* Test tunables */ +#define LOOP_COUNT 10 +#define SAMPLE_PERIOD 10000 + +static int branch_sample_type; +static int branch_test_set[27] = { + PERF_SAMPLE_BRANCH_ANY_CALL, /* Single filters */ + PERF_SAMPLE_BRANCH_ANY_RETURN, + PERF_SAMPLE_BRANCH_COND, + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | /* Double filters */ + PERF_SAMPLE_BRANCH_ANY_RETURN, + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_COND, + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND, + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | /* Tripple filters */ + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND, + + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_IND_CALL, + + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL, + + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | /* Quadruple filters */ + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL, + + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY, + + PERF_SAMPLE_BRANCH_ANY_CALL | /* All filters */ + PERF_SAMPLE_BRANCH_ANY_RETURN | + PERF_SAMPLE_BRANCH_COND | + PERF_SAMPLE_BRANCH_IND_CALL | + PERF_SAMPLE_BRANCH_ANY }; + +static unsigned int all_set[ALL_MAX], call_set[CALL_MAX]; +static unsigned int ret_set[RET_MAX], cond_set[COND_MAX], ind_set[IND_MAX]; + +static bool has_failed; +static unsigned long branch_any_call; +static unsigned long branch_any_return; +static unsigned long branch_cond; +static unsigned long branch_ind_call; +static unsigned long branch_any; +static unsigned long branch_total; + +static void init_branch_stats(void) +{ + branch_any_call = 0; + branch_any_return = 0; + branch_cond = 0; + branch_ind_call = 0; + branch_any = 0; + branch_total = 0; +} + +static void show_branch_stats(void) +{ + printf("BRANCH STATS\n"); + printf("ANY_CALL: %ld\n", branch_any_call); + printf("ANY_RETURN: %ld\n", branch_any_return); + printf("COND: %ld\n", branch_cond); + printf("IND_CALL: %ld\n", branch_ind_call); + printf("ANY: %ld\n", branch_any); + printf("TOTAL: %ld\n", branch_total); + +} + +static void fetch_branches(void) +{ + int i; + + /* Clear */ + memset(all_set, 0, sizeof(all_set)); + memset(call_set, 0, sizeof(call_set)); + memset(ret_set, 0, sizeof(ret_set)); + memset(cond_set, 0, sizeof(cond_set)); + memset(ind_set, 0, sizeof(ind_set)); + + /* Fetch */ + fetch_all_branches(all_set); + fetch_all_calls(call_set); + fetch_all_rets(ret_set); + fetch_all_conds(cond_set); + fetch_all_inds(ind_set); + + /* Display */ + printf("ANY branches\n"); + for (i = 0; i < ALL_MAX; i += 2) + printf("%x ---> %x\n", all_set[i], all_set[i + 1]); + + printf("ANY_CALL branches\n"); + for (i = 0; i < CALL_MAX; i += 2) + printf("%x ---> %x\n", call_set[i], call_set[i + 1]); + + printf("ANY_RETURN branches\n"); + for (i = 0; i < RET_MAX; i += 2) + printf("%x ---> %x\n", ret_set[i], ret_set[i + 1]); + + printf("COND branches\n"); + for (i = 0; i < COND_MAX; i += 2) + printf("%x ---> %x\n", cond_set[i], cond_set[i + 1]); + + printf("IND_CALL branches\n"); + for (i = 0; i < IND_MAX; i += 2) + printf("%x ---> %x\n", ind_set[i], ind_set[i + 1]); + +} + +/* Perf mmap stats */ +static unsigned long record_sample; +static unsigned long record_mmap; +static unsigned long record_lost; +static unsigned long record_throttle; +static unsigned long record_unthrottle; +static unsigned long record_overlap; + +static void init_perf_mmap_stats(void) +{ + record_sample = 0; + record_mmap = 0; + record_lost = 0; + record_throttle = 0; + record_unthrottle = 0; + record_overlap = 0; +} + +static void show_perf_mmap_stats(void) +{ + printf("PERF STATS\n"); + printf("OVERLAP: %ld\n", record_overlap); + printf("RECORD_SAMPLE: %ld\n", record_sample); + printf("RECORD_MAP: %ld\n", record_mmap); + printf("RECORD_LOST: %ld\n", record_lost); + printf("RECORD_THROTTLE: %ld\n", record_throttle); + printf("RECORD_UNTHROTTLE: %ld\n", record_unthrottle); +} + +static bool search_all_set(unsigned long from, unsigned long to) +{ + int i; + + for (i = 0; i < ALL_MAX; i += 2) { + if ((all_set[i] == from) && (all_set[i+1] == to)) + return true; + } + return false; +} + +static bool search_call_set(unsigned long from, unsigned long to) +{ + int i; + + for (i = 0; i < CALL_MAX; i += 2) { + if ((call_set[i] == from) && (call_set[i+1] == to)) + return true; + } + return false; +} + +static bool search_ret_set(unsigned long from, unsigned long to) +{ + int i; + + for (i = 0; i < RET_MAX; i += 2) { + if ((ret_set[i] == from) && (ret_set[i+1] == to)) + return true; + } + return false; +} + +static bool search_cond_set(unsigned long from, unsigned long to) +{ + int i; + + for (i = 0; i < COND_MAX; i += 2) { + if ((cond_set[i] == from) && (cond_set[i+1] == to)) + return true; + } + return false; +} + +static bool search_ind_set(unsigned long from, unsigned long to) +{ + int i; + + for (i = 0; i < IND_MAX; i += 2) { + if ((ind_set[i] == from) && (ind_set[i+1] == to)) + return true; + } + return false; +} + +static int check_branch(unsigned long from, unsigned long to) +{ + bool result = false; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + if (search_call_set(from, to)) { + branch_any_call++; + result = true; + } + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) { + if (search_ret_set(from, to)) { + branch_any_return++; + result = true; + } + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) { + if (search_cond_set(from, to)) { + branch_cond++; + result = true; + } + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) { + if (search_ind_set(from, to)) { + branch_ind_call++; + result = true; + } + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) { + if (search_all_set(from, to)) { + branch_any++; + result = true; + } + } + + branch_total++; + return result; +} + +static void *ring_buffer_mask(struct ring_buffer *r, void *p) +{ + unsigned long l = (unsigned long)p; + + return (void *)(r->ring_base + ((l - r->ring_base) & r->mask)); +} + +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r) +{ + unsigned long from, to, flag; + int i, nr; + int64_t *v; + + /* NR Branches */ + v = ring_buffer_mask(r, hdr + 1); + + nr = *v; + + /* Branches */ + for (i = 0; i < nr; i++) { + v = ring_buffer_mask(r, v + 1); + from = *v; + + v = ring_buffer_mask(r, v + 1); + to = *v; + + v = ring_buffer_mask(r, v + 1); + flag = *v; + + if (!check_branch(from, to)) { + has_failed = true; + printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n", + branch_sample_type, from, to, flag); + } + } +} + +static void read_ring_buffer(struct event *e) +{ + struct ring_buffer *r = &e->ring_buffer; + struct perf_event_header *hdr; + int old, head; + + head = r->page->data_head & r->mask; + + asm volatile ("sync": : :"memory"); + + old = r->page->data_tail & r->mask; + + while (old != head) { + hdr = (struct perf_event_header *)(r->ring_base + old); + + if ((old & r->mask) + hdr->size != + ((old + hdr->size) & r->mask)) + ++record_overlap; + + if (hdr->type == PERF_RECORD_SAMPLE) { + ++record_sample; + dump_sample(hdr, r); + } + + if (hdr->type == PERF_RECORD_MMAP) + ++record_mmap; + + if (hdr->type == PERF_RECORD_LOST) + ++record_lost; + + if (hdr->type == PERF_RECORD_THROTTLE) + ++record_throttle; + + if (hdr->type == PERF_RECORD_UNTHROTTLE) + ++record_unthrottle; + + old = (old + hdr->size) & r->mask; + } + r->page->data_tail = old; +} + +static void event_mmap(struct event *e) +{ + struct ring_buffer *r = &e->ring_buffer; + + r->page = mmap(NULL, 9 * getpagesize(), PROT_READ | + PROT_WRITE, MAP_SHARED, e->fd, 0); + + if (r->page == MAP_FAILED) { + r->page = NULL; + perror("mmap() failed"); + } + + r->mask = (8 * getpagesize()) - 1; + r->ring_base = (unsigned long)r->page + getpagesize(); + +} + +static int filter_test(void) +{ + struct pollfd pollfd; + struct event ebhrb; + pid_t pid; + int ret, loop = 0; + + has_failed = false; + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + return 1; + } + + /* Run child */ + if (pid == 0) { + start_loop(); + exit(0); + } + + /* Prepare event */ + event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "insturctions"); + ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + ebhrb.attr.disabled = 1; + ebhrb.attr.mmap = 1; + ebhrb.attr.mmap_data = 1; + ebhrb.attr.sample_period = SAMPLE_PERIOD; + ebhrb.attr.exclude_user = 0; + ebhrb.attr.exclude_kernel = 1; + ebhrb.attr.exclude_hv = 1; + ebhrb.attr.branch_sample_type = branch_sample_type; + + /* Open event */ + event_open_with_pid(&ebhrb, pid); + + /* Mmap ring buffer and enable event */ + event_mmap(&ebhrb); + FAIL_IF(event_enable(&ebhrb)); + + /* Prepare polling */ + pollfd.fd = ebhrb.fd; + pollfd.events = POLLIN; + + for (loop = 0; loop < LOOP_COUNT; loop++) { + ret = poll(&pollfd, 1, -1); + if (ret == -1) { + perror("poll() failed"); + return 1; + } + if (ret == 0) { + perror("poll() timeout"); + return 1; + } + read_ring_buffer(&ebhrb); + if (has_failed) + return 1; + } + + /* Disable and close event */ + FAIL_IF(event_disable(&ebhrb)); + event_close(&ebhrb); + + /* Terminate child */ + kill(pid, SIGKILL); + return 0; +} + +static int bhrb_filters_test(void) +{ + int i; + + /* Fetch branches */ + fetch_branches(); + init_branch_stats(); + init_perf_mmap_stats(); + + for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) { + branch_sample_type = branch_test_set[i]; + if (filter_test()) + return 1; + } + + /* Show stats */ + show_branch_stats(); + show_perf_mmap_stats(); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(bhrb_filters_test, "bhrb_filters"); +} diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h new file mode 100644 index 0000000..072375a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h @@ -0,0 +1,16 @@ +/* + * Copyright 2015 Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Assembly routines */ +extern void fetch_all_branches(unsigned int *); +extern void fetch_all_calls(unsigned int *); +extern void fetch_all_rets(unsigned int *); +extern void fetch_all_conds(unsigned int *); +extern void fetch_all_inds(unsigned int *); +extern void start_loop(void); diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S new file mode 100644 index 0000000..e72a585 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters_asm.S @@ -0,0 +1,260 @@ +/* + * Assembly functions for BHRB test + * + * Copyright 2015 Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#define GLUE(a,b) a##b +#define _GLOBAL(name) \ + .section ".text"; \ + .align 2 ; \ + .globl name; \ + .globl GLUE(.,name); \ + .section ".opd","aw"; \ +name: \ + .quad GLUE(.,name); \ + .quad .TOC.@tocbase; \ + .quad 0; \ + .previous; \ + .type GLUE(.,name),@function; \ +GLUE(.,name): + +#define LR_SAVE 19 + +#define LOAD_ADDR(reg, label) \ + lis reg,(label)@highest; \ + ori reg,reg,(label)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(label)@h; \ + ori reg,reg,(label)@l; \ + +#define LOAD_LABEL(reg1, label1, reg2, label2) \ + lis reg1,(label1)@highest; \ + ori reg1,reg1,(label1)@higher; \ + rldicr reg1,reg1,32,31; \ + oris reg1,reg1,(label1)@h; \ + ori reg1,reg1,(label1)@l; \ + lis reg2,(label2)@highest; \ + ori reg2,reg2,(label2)@higher; \ + rldicr reg2,reg2,32,31; \ + oris reg2,reg2,(label2)@h; \ + ori reg2,reg2,(label2)@l; \ + +_GLOBAL(start_loop) +label: + b label0 /* ANY */ + blr /* ANY_RETURN */ +label0: + b label1 /* ANY */ + +label1: + b label2 /* ANY */ + +label2: + b label3 /* ANY */ + +label3: + mflr LR_SAVE + bl label4 /* ANY | ANY_CALL */ + mtlr LR_SAVE + b start_loop /* ANY */ +label4: + mflr LR_SAVE + li 20, 12 + cmpi 3, 20, 12 + bcl 12, 4 * cr3+2, label5 /* ANY | ANY_CALL | COND */ + li 20, 12 + cmpi 4, 20, 20 + bcl 12, 4 * cr4+0, label5 /* ANY | ANY_CALL | COND */ + LOAD_ADDR(20, label5) + mtctr 20 + li 22, 10 + cmpi 0, 22, 10 + bcctrl 12, 4*cr0+2 /* ANY | NY_CALL | IND_CALL | COND */ + LOAD_ADDR(20, label5) + mtlr 20 + li 20, 10 + cmpi 0, 20, 10 + bclrl 12, 4*cr0+2 /* ANY | ANY_CALL | IND_CALL | COND */ + mtlr LR_SAVE + blr /* ANY | ANY_RETURN */ + +label5: + blr /* ANY | ANY_RETURN */ + +_GLOBAL(fetch_all_branches) + LOAD_LABEL(20, label, 21, label0) + st 20, 0(3) + st 21, 4(3) + + LOAD_LABEL(20, label0, 21, label1) + st 20, 8(3) + st 21, 12(3) + + LOAD_LABEL(20, label1, 21, label2) + st 20, 16(3) + st 21, 20(3) + + LOAD_LABEL(20, label2, 21, label3) + st 20, 24(3) + st 21, 28(3) + + LOAD_LABEL(20, label3, 21, label4) + addi 20, 20, 4 + st 20, 32(3) + st 21, 36(3) + + LOAD_LABEL(20, label3, 21, label) + addi 20, 20, 12 + st 20, 40(3) + st 21, 44(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 12 + st 20, 48(3) + st 21, 52(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 24 + st 20, 56(3) + st 21, 60(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 60 + st 20, 64(3) + st 21, 68(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 96 + st 20, 72(3) + st 21, 76(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 104 + st 20, 80(3) + st 21, 84(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 88(3) + addi 21, 21, 16 + st 21, 92(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 96(3) + addi 21, 21, 28 + st 21, 100(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 104(3) + addi 21, 21, 64 + st 21, 108(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 112(3) + addi 21, 21, 100 + st 21, 116(3) + + LOAD_LABEL(20, label4, 21, label3) + addi 20, 20, 104 + st 20, 120(3) + addi 21, 21, 8 + st 21, 124(3) + blr + +_GLOBAL(fetch_all_calls) + LOAD_LABEL(20, label3, 21, label4) + addi 20, 20, 4 + st 20, 0(3) + st 21, 4(3) + + LOAD_LABEL(20, label3, 21, label4) + addi 20, 20, 4 + st 20, 8(3) + st 21, 12(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 12 + st 20, 16(3) + st 21, 20(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 24 + st 20, 24(3) + st 21, 28(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 60 + st 20, 32(3) + st 21, 36(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 96 + st 20, 40(3) + st 21, 44(3) + blr + +_GLOBAL(fetch_all_rets) + LOAD_LABEL(20, label5, 21, label4) + st 20, 0(3) + addi 21, 21, 16 + st 21, 4(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 8(3) + addi 21, 21, 28 + st 21, 12(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 16(3) + addi 21, 21, 64 + st 21, 20(3) + + LOAD_LABEL(20, label5, 21, label4) + st 20, 24(3) + addi 21, 21, 100 + st 21, 28(3) + + LOAD_LABEL(20, label4, 21, label3) + addi 20, 20, 104 + st 20, 32(3) + addi 21, 21, 8 + st 21, 36(3) + blr + +_GLOBAL(fetch_all_conds) + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 12 + st 20, 0(3) + st 21, 4(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 24 + st 20, 8(3) + st 21, 12(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 60 + st 20, 16(3) + st 21, 20(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 96 + st 20, 24(3) + st 21, 28(3) + blr + +_GLOBAL(fetch_all_inds) + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 60 + st 20, 0(3) + st 21, 4(3) + + LOAD_LABEL(20, label4, 21, label5) + addi 20, 20, 96 + st 20, 8(3) + st 21, 12(3) + blr diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index a0ea6b1..385b1c4 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -11,6 +11,10 @@ #include "utils.h" +struct ring_buffer { + struct perf_event_mmap_page *page; + unsigned long ring_base, old, mask; +}; struct event { struct perf_event_attr attr; @@ -22,6 +26,7 @@ struct event { u64 running; u64 enabled; } result; + struct ring_buffer ring_buffer; }; void event_init(struct event *e, u64 config);