Message ID | 63d80c2247003fa0116dbcd0188153d79793616f.1568219400.git.isaku.yamahata@gmail.com |
---|---|
State | New |
Headers | show |
Series | Library OS support | expand |
I believe this segment + time0 * 100.0/time1, + time1 * 100.0/time1, + time2 * 100.0/time1, + time3 * 100.0/time1, + time4 * 100.0/time1, + time5 * 100.0/time1); needs to change to: + time0 * 100.0/time0, + time1 * 100.0/time1, + time2 * 100.0/time2, + time3 * 100.0/time3, + time4 * 100.0/time4, + time5 * 100.0/time5); Also, I would recommend removing or delaying all printf statements that occur before your timing measurements. My experience has been when doing delicate timing experiments, printf can trigger async I/O activity which adds significant noise to the measurements. There may be other opportunities to refine the measurements, including taking each measurement experiment several times and then reporting both the min, max, median, and mean. - patrick mcgehearty On 9/11/2019 4:04 PM, Isaku Yamahata wrote: > This is simple benchmark to measure function/nop effects. > OS noise is bigger even if it's pinned down to cpu and to make it rt > process. > > $ sudo chrt -r 99 taskset 1 ./a.out > > Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com> > --- > benchtests/bench-nop.c | 128 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 128 insertions(+) > create mode 100644 benchtests/bench-nop.c > > diff --git a/benchtests/bench-nop.c b/benchtests/bench-nop.c > new file mode 100644 > index 0000000000..bb98b3d371 > --- /dev/null > +++ b/benchtests/bench-nop.c > @@ -0,0 +1,128 @@ > +#include <stdio.h> > +#include <unistd.h> > +#include <sys/syscall.h> > + > +static inline unsigned long long rdtscp(void) > +{ > + unsigned int aux; > + unsigned long long now = __builtin_ia32_rdtscp (&aux); > + return now; > +} > + > +//#define LOOP 10000000 > +#define LOOP 1000000 > + > +void func0(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret = syscall(SYS_gettid); > + } > +} > + > +void func1(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret; > + __asm__ volatile( > + "syscall\n" > + : "=a"(ret) > + : "0"(SYS_gettid)); > + } > +} > + > +void func2(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret; > + __asm__ volatile( > + "syscall\n" > + "nop;nop;nop\n" > + : "=a"(ret) > + : "0"(SYS_gettid)); > + } > +} > + > +void func3(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret; > + __asm__ volatile( > + "syscall\n" > + "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop\n" > + : "=a"(ret) > + : "0"(SYS_gettid)); > + } > +} > + > +void func4(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret; > + __asm__ volatile( > + "jmp 1f\n" > + "nop\n" > + "1:\n" > + "syscall\n" > + : "=a"(ret) > + : "0"(SYS_gettid)); > + } > +} > + > +void func5(void) > +{ > + for (int i = 0; i < LOOP; i++) > + { > + unsigned long ret; > + __asm__ volatile( > + "jmp 1f\n" > + "nop;nop;nop;nop;nop;nop;nop;nop\n" > + "1:\n" > + "syscall\n" > + : "=a"(ret) > + : "0"(SYS_gettid)); > + } > +} > + > + > +unsigned long long measure(void (*f)(void)) > +{ > + unsigned long long start = rdtscp(); > + (*f)(); > + unsigned long long end = rdtscp(); > + return end - start; > +} > + > +int main(int argc, char** argv) > +{ > + printf("measuring syscall func\n"); > + unsigned long long time0 = measure(&func0); > + > + printf("measuring syscall instruction\n"); > + unsigned long long time1 = measure(&func1); > + > + printf("measuring syscall + nop * 3\n"); > + unsigned long long time2 = measure(&func2); > + > + printf("measuring syscall + nop * 10\n"); > + unsigned long long time3 = measure(&func3); > + > + printf("measuring jmp + nop + syscall\n"); > + unsigned long long time4 = measure(&func4); > + > + printf("measuring jmp + nop * 8 + syscall\n"); > + unsigned long long time5 = measure(&func5); > + > + printf("\tfunc\tinst\tnop*3\tnop*10\tjmp+nop\tjmp+nop*8\n"); > + printf("ratio\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\n", > + time0 * 100.0/time1, > + time1 * 100.0/time1, > + time2 * 100.0/time1, > + time3 * 100.0/time1, > + time4 * 100.0/time1, > + time5 * 100.0/time1); > +}
diff --git a/benchtests/bench-nop.c b/benchtests/bench-nop.c new file mode 100644 index 0000000000..bb98b3d371 --- /dev/null +++ b/benchtests/bench-nop.c @@ -0,0 +1,128 @@ +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> + +static inline unsigned long long rdtscp(void) +{ + unsigned int aux; + unsigned long long now = __builtin_ia32_rdtscp (&aux); + return now; +} + +//#define LOOP 10000000 +#define LOOP 1000000 + +void func0(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret = syscall(SYS_gettid); + } +} + +void func1(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret; + __asm__ volatile( + "syscall\n" + : "=a"(ret) + : "0"(SYS_gettid)); + } +} + +void func2(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret; + __asm__ volatile( + "syscall\n" + "nop;nop;nop\n" + : "=a"(ret) + : "0"(SYS_gettid)); + } +} + +void func3(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret; + __asm__ volatile( + "syscall\n" + "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop\n" + : "=a"(ret) + : "0"(SYS_gettid)); + } +} + +void func4(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret; + __asm__ volatile( + "jmp 1f\n" + "nop\n" + "1:\n" + "syscall\n" + : "=a"(ret) + : "0"(SYS_gettid)); + } +} + +void func5(void) +{ + for (int i = 0; i < LOOP; i++) + { + unsigned long ret; + __asm__ volatile( + "jmp 1f\n" + "nop;nop;nop;nop;nop;nop;nop;nop\n" + "1:\n" + "syscall\n" + : "=a"(ret) + : "0"(SYS_gettid)); + } +} + + +unsigned long long measure(void (*f)(void)) +{ + unsigned long long start = rdtscp(); + (*f)(); + unsigned long long end = rdtscp(); + return end - start; +} + +int main(int argc, char** argv) +{ + printf("measuring syscall func\n"); + unsigned long long time0 = measure(&func0); + + printf("measuring syscall instruction\n"); + unsigned long long time1 = measure(&func1); + + printf("measuring syscall + nop * 3\n"); + unsigned long long time2 = measure(&func2); + + printf("measuring syscall + nop * 10\n"); + unsigned long long time3 = measure(&func3); + + printf("measuring jmp + nop + syscall\n"); + unsigned long long time4 = measure(&func4); + + printf("measuring jmp + nop * 8 + syscall\n"); + unsigned long long time5 = measure(&func5); + + printf("\tfunc\tinst\tnop*3\tnop*10\tjmp+nop\tjmp+nop*8\n"); + printf("ratio\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\n", + time0 * 100.0/time1, + time1 * 100.0/time1, + time2 * 100.0/time1, + time3 * 100.0/time1, + time4 * 100.0/time1, + time5 * 100.0/time1); +}
This is simple benchmark to measure function/nop effects. OS noise is bigger even if it's pinned down to cpu and to make it rt process. $ sudo chrt -r 99 taskset 1 ./a.out Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com> --- benchtests/bench-nop.c | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 benchtests/bench-nop.c