[RFC,11/11] benchtests: simple benchmark to measure nop effects
diff mbox series

Message ID 63d80c2247003fa0116dbcd0188153d79793616f.1568219400.git.isaku.yamahata@gmail.com
State New
Headers show
Series
  • Library OS support
Related show

Commit Message

Isaku Yamahata Sept. 11, 2019, 9:04 p.m. UTC
This is simple benchmark to measure function/nop effects.
OS noise is bigger even if it's pinned down to cpu and to make it rt
process.

$ sudo chrt -r 99 taskset 1 ./a.out

Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com>
---
 benchtests/bench-nop.c | 128 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 benchtests/bench-nop.c

Comments

Patrick McGehearty Sept. 11, 2019, 9:35 p.m. UTC | #1
I believe this segment
+     time0 * 100.0/time1,
+     time1 * 100.0/time1,
+     time2 * 100.0/time1,
+     time3 * 100.0/time1,
+     time4 * 100.0/time1,
+     time5 * 100.0/time1);
needs to change to:
+     time0 * 100.0/time0,
+     time1 * 100.0/time1,
+     time2 * 100.0/time2,
+     time3 * 100.0/time3,
+     time4 * 100.0/time4,
+     time5 * 100.0/time5);

Also, I would recommend removing or delaying all printf statements
that occur before your timing measurements. My experience has been
when doing delicate timing experiments, printf can trigger async I/O
activity which adds significant noise to the measurements.

There may be other opportunities to refine the measurements,
including taking each measurement experiment several times
and then reporting both the min, max, median, and mean.

- patrick mcgehearty


On 9/11/2019 4:04 PM, Isaku Yamahata wrote:
> This is simple benchmark to measure function/nop effects.
> OS noise is bigger even if it's pinned down to cpu and to make it rt
> process.
>
> $ sudo chrt -r 99 taskset 1 ./a.out
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com>
> ---
>   benchtests/bench-nop.c | 128 +++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 128 insertions(+)
>   create mode 100644 benchtests/bench-nop.c
>
> diff --git a/benchtests/bench-nop.c b/benchtests/bench-nop.c
> new file mode 100644
> index 0000000000..bb98b3d371
> --- /dev/null
> +++ b/benchtests/bench-nop.c
> @@ -0,0 +1,128 @@
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/syscall.h>
> +
> +static inline unsigned long long rdtscp(void)
> +{
> +  unsigned int aux;
> +  unsigned long long now = __builtin_ia32_rdtscp (&aux);
> +  return now;
> +}
> +
> +//#define LOOP 10000000
> +#define LOOP 1000000
> +
> +void func0(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret = syscall(SYS_gettid);
> +    }
> +}
> +
> +void func1(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret;
> +      __asm__ volatile(
> +		       "syscall\n"
> +		       : "=a"(ret)
> +		       : "0"(SYS_gettid));
> +    }
> +}
> +
> +void func2(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret;
> +      __asm__ volatile(
> +		       "syscall\n"
> +		       "nop;nop;nop\n"
> +		       : "=a"(ret)
> +		       : "0"(SYS_gettid));
> +    }
> +}
> +
> +void func3(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret;
> +      __asm__ volatile(
> +		       "syscall\n"
> +		       "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop\n"
> +		       : "=a"(ret)
> +		       : "0"(SYS_gettid));
> +    }
> +}
> +
> +void func4(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret;
> +      __asm__ volatile(
> +		       "jmp 1f\n"
> +		       "nop\n"
> +		       "1:\n"
> +		       "syscall\n"
> +		       : "=a"(ret)
> +		       : "0"(SYS_gettid));
> +    }
> +}
> +
> +void func5(void)
> +{
> +  for (int i = 0; i < LOOP; i++)
> +    {
> +      unsigned long ret;
> +      __asm__ volatile(
> +		       "jmp 1f\n"
> +		       "nop;nop;nop;nop;nop;nop;nop;nop\n"
> +		       "1:\n"
> +		       "syscall\n"
> +		       : "=a"(ret)
> +		       : "0"(SYS_gettid));
> +    }
> +}
> +
> +
> +unsigned long long measure(void (*f)(void))
> +{
> +  unsigned long long start = rdtscp();
> +  (*f)();
> +  unsigned long long end = rdtscp();
> +  return end - start;
> +}
> +
> +int main(int argc, char** argv)
> +{
> +  printf("measuring syscall func\n");
> +  unsigned long long time0 = measure(&func0);
> +
> +  printf("measuring syscall instruction\n");
> +  unsigned long long time1 = measure(&func1);
> +
> +  printf("measuring syscall + nop * 3\n");
> +  unsigned long long time2 = measure(&func2);
> +
> +  printf("measuring syscall + nop * 10\n");
> +  unsigned long long time3 = measure(&func3);
> +
> +  printf("measuring jmp + nop + syscall\n");
> +  unsigned long long time4 = measure(&func4);
> +
> +  printf("measuring jmp + nop * 8 + syscall\n");
> +  unsigned long long time5 = measure(&func5);
> +
> +  printf("\tfunc\tinst\tnop*3\tnop*10\tjmp+nop\tjmp+nop*8\n");
> +  printf("ratio\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\n",
> +	 time0 * 100.0/time1,
> +	 time1 * 100.0/time1,
> +	 time2 * 100.0/time1,
> +	 time3 * 100.0/time1,
> +	 time4 * 100.0/time1,
> +	 time5 * 100.0/time1);
> +}

Patch
diff mbox series

diff --git a/benchtests/bench-nop.c b/benchtests/bench-nop.c
new file mode 100644
index 0000000000..bb98b3d371
--- /dev/null
+++ b/benchtests/bench-nop.c
@@ -0,0 +1,128 @@ 
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline unsigned long long rdtscp(void)
+{
+  unsigned int aux;
+  unsigned long long now = __builtin_ia32_rdtscp (&aux);
+  return now;
+}
+
+//#define LOOP 10000000
+#define LOOP 1000000
+
+void func0(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret = syscall(SYS_gettid);
+    }
+}
+
+void func1(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret;
+      __asm__ volatile(
+		       "syscall\n"
+		       : "=a"(ret)
+		       : "0"(SYS_gettid));
+    }
+}
+
+void func2(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret;
+      __asm__ volatile(
+		       "syscall\n"
+		       "nop;nop;nop\n"
+		       : "=a"(ret)
+		       : "0"(SYS_gettid));
+    }
+}
+
+void func3(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret;
+      __asm__ volatile(
+		       "syscall\n"
+		       "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop\n"
+		       : "=a"(ret)
+		       : "0"(SYS_gettid));
+    }
+}
+
+void func4(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret;
+      __asm__ volatile(
+		       "jmp 1f\n"
+		       "nop\n"
+		       "1:\n"
+		       "syscall\n"
+		       : "=a"(ret)
+		       : "0"(SYS_gettid));
+    }
+}
+
+void func5(void)
+{
+  for (int i = 0; i < LOOP; i++)
+    {
+      unsigned long ret;
+      __asm__ volatile(
+		       "jmp 1f\n"
+		       "nop;nop;nop;nop;nop;nop;nop;nop\n"
+		       "1:\n"
+		       "syscall\n"
+		       : "=a"(ret)
+		       : "0"(SYS_gettid));
+    }
+}
+
+
+unsigned long long measure(void (*f)(void))
+{
+  unsigned long long start = rdtscp();
+  (*f)();
+  unsigned long long end = rdtscp();
+  return end - start;
+}
+
+int main(int argc, char** argv)
+{
+  printf("measuring syscall func\n");
+  unsigned long long time0 = measure(&func0);
+
+  printf("measuring syscall instruction\n");
+  unsigned long long time1 = measure(&func1);
+
+  printf("measuring syscall + nop * 3\n");
+  unsigned long long time2 = measure(&func2);
+
+  printf("measuring syscall + nop * 10\n");
+  unsigned long long time3 = measure(&func3);
+
+  printf("measuring jmp + nop + syscall\n");
+  unsigned long long time4 = measure(&func4);
+
+  printf("measuring jmp + nop * 8 + syscall\n");
+  unsigned long long time5 = measure(&func5);
+
+  printf("\tfunc\tinst\tnop*3\tnop*10\tjmp+nop\tjmp+nop*8\n");
+  printf("ratio\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\t%3.2f\n",
+	 time0 * 100.0/time1,
+	 time1 * 100.0/time1,
+	 time2 * 100.0/time1,
+	 time3 * 100.0/time1,
+	 time4 * 100.0/time1,
+	 time5 * 100.0/time1);
+}