From patchwork Wed Jun 17 15:28:27 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wilco X-Patchwork-Id: 485487 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 8D8C5140284 for ; Thu, 18 Jun 2015 01:28:43 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=sourceware.org header.i=@sourceware.org header.b=kDforj2v; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:mime-version :content-type:content-transfer-encoding; q=dns; s=default; b=KoR 4o7IsS4PUxxsTcKEPcg5UGZCHaWKhc1ft9aR6tNHE2Xhj9chHzFPEn5OCNVFSCLS Tc/tlO2UL1S49WMTdeqFejTpK88nd1mHfsZu9oXwy2iV6zIK9out3Mkc7Zcynvl5 TjdInUPiUOBqcfDyDzVZ1OLREv54dmYJb3S5Ojvw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:mime-version :content-type:content-transfer-encoding; s=default; bh=oh2YTtY+d VtJOD/7oVl6tmXG6GE=; b=kDforj2v/sdQ/jFk15BB+qgJ/XbAcI7G0A2IuArgy tycSXImgrgbyGll5B13T+XhEm/PE8mbu950hF6+iQGxxTkDCqV6RgcAt/DImXss6 6XD8rUkB/rrFsdo38katyTt6SPsIiEk7Wv7cRk48zr6yBMTI938h4PICT0fT/VNf i4= Received: (qmail 93652 invoked by alias); 17 Jun 2015 15:28:37 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 93640 invoked by uid 89); 17 Jun 2015 15:28:36 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.9 required=5.0 tests=AWL, BAYES_20, SPF_PASS autolearn=ham version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com From: "Wilco Dijkstra" To: "GNU C Library" Subject: [PATCH] Add math-inline benchmark Date: Wed, 17 Jun 2015 16:28:27 +0100 Message-ID: <001c01d0a912$42357710$c6a06530$@com> MIME-Version: 1.0 X-MC-Unique: IHpp8OS2S3-mBGXu0dxWOA-1 Hi, Due to popular demand, here is a new benchmark that tests isinf, isnan, isnormal, isfinite and fpclassify. It uses 2 arrays with 1024 doubles, one with 99% finite FP numbers (10% zeroes, 10% negative) and 1% inf/NaN, the other with 50% inf, and 50% Nan. Results shows that using the GCC built-ins in math.h will give huge speedups due to avoiding explict calls, PLT indirection to execute a function with 3-4 instructions. The GCC builtins have similar performance as the existing math_private inlines for __isnan, __finite and __isinf_ns. OK for commit? ChangeLog: 2015-06-17 Wilco Dijkstra * benchtests/Makefile: Add bench-math-inlines.c. * benchtests/bench-math-inlines.c: New benchmark. --- benchtests/Makefile | 14 +-- benchtests/bench-math-inlines.c | 203 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 benchtests/bench-math-inlines.c diff --git a/benchtests/Makefile b/benchtests/Makefile index 8e615e5..3c20180 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -30,12 +30,13 @@ bench-pthread := pthread_once bench := $(bench-math) $(bench-pthread) # String function benchmarks. -string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ - mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ - strcat strchr strchrnul strcmp strcpy strcspn strlen \ - strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ - strcoll +string-bench := bcopy bzero math-inlines memccpy memchr memcmp memcpy memmem \ + memmove mempcpy memset rawmemchr stpcpy stpncpy strcasecmp \ + strcasestr strcat strchr strchrnul strcmp strcpy strcspn \ + strlen strncasecmp strncat strncmp strncpy strnlen strpbrk \ + strrchr strspn strstr strcpy_chk stpcpy_chk memrchr strsep \ + strtok strcoll + string-bench-all := $(string-bench) # We have to generate locales @@ -58,6 +59,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin bench-malloc := malloc-thread $(addprefix $(objpfx)bench-,$(bench-math)): $(libm) +$(addprefix $(objpfx)bench-,math-inlines): $(libm) $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library) $(objpfx)bench-malloc-thread: $(shared-thread-library) diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math-inlines.c new file mode 100644 index 0000000..c21a3d3 --- /dev/null +++ b/benchtests/bench-math-inlines.c @@ -0,0 +1,203 @@ +/* Measure math inline functions. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define SIZE 1024 +#define TEST_MAIN +#define TEST_NAME "math-inlines" +#include "bench-string.h" + +#include +#include +#include + +#define BOOLTEST(func) \ +int \ +func ## _t (volatile double *p, size_t n, size_t iters) \ +{ \ + int i, j; \ + int res = 0; \ + for (j = 0; j < iters; j++) \ + for (i = 0; i < n; i++) \ + if (func (p[i] * 2.0)) res++; \ + return res; \ +} + +#define VALUETEST(func) \ +int \ +func ## _t (volatile double *p, size_t n, size_t iters) \ +{ \ + int i, j; \ + int res = 0; \ + for (j = 0; j < iters; j++) \ + for (i = 0; i < n; i++) \ + res += func (p[i] * 2.0); \ + return res; \ +} + +typedef union +{ + double value; + uint64_t word; +} ieee_double_shape_type; + +#define EXTRACT_WORDS64(i,d) \ +do { \ + ieee_double_shape_type gh_u; \ + gh_u.value = (d); \ + (i) = gh_u.word; \ +} while (0) + +/* Explicit inlines similar to math_private.h versions. */ + +extern __always_inline int +__isnan_inl (double d) +{ + uint64_t di; + EXTRACT_WORDS64 (di, d); + return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull; +} + +extern __always_inline int +__isinf_inl (double x) +{ + uint64_t ix; + EXTRACT_WORDS64 (ix,x); + if ((ix << 1) != 0xffe0000000000000ull) + return 0; + return (int)(ix >> 32); +} + +extern __always_inline int +__finite_inl (double d) +{ + uint64_t di; + EXTRACT_WORDS64 (di, d); + return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull; +} + +/* Explicit inline similar to existing math.h implementation. */ + +#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL) +#define __isnormal_inl2(X) (fpclassify (X) == FP_NORMAL) + +/* Test fpclassify with use of only 2 of the 5 results. */ + +extern __always_inline int +__fpclassify_test1 (double d) +{ + int cl = fpclassify (d); + return cl == FP_NAN || cl == FP_INFINITE; +} + +extern __always_inline int +__fpclassify_test2 (double d) +{ + return isnan (d) || isinf (d); +} + +/* Create test functions for each possibility. */ + +BOOLTEST (__isnan) +BOOLTEST (isnan) +BOOLTEST (__isnan_inl) + +BOOLTEST (__isinf) +BOOLTEST (isinf) +BOOLTEST (__isinf_inl) + +BOOLTEST (__finite) +BOOLTEST (isfinite) +BOOLTEST (__finite_inl) + +BOOLTEST (isnormal) +BOOLTEST (__isnormal_inl) +BOOLTEST (__isnormal_inl2) + +VALUETEST (fpclassify) +VALUETEST (__fpclassify) +BOOLTEST (__fpclassify_test1) +BOOLTEST (__fpclassify_test2) + +IMPL (isnan_t, 0) +IMPL (__isnan_t, 1) +IMPL (__isnan_inl_t, 2) +IMPL (isinf_t, 3) +IMPL (__isinf_t, 4) +IMPL (__isinf_inl_t, 5) +IMPL (isfinite_t, 6) +IMPL (__finite_t, 7) +IMPL (__finite_inl_t, 8) +IMPL (isnormal_t, 9) +IMPL (__isnormal_inl_t, 10) +IMPL (__isnormal_inl2_t, 11) +IMPL (fpclassify_t, 12) +IMPL (__fpclassify_t, 13) +IMPL (__fpclassify_test1_t, 14) +IMPL (__fpclassify_test2_t, 15) + +typedef int (*proto_t) (volatile double *p, size_t n, size_t iters); + +static void +do_one_test (impl_t *impl, volatile double *arr, size_t len) +{ + size_t iters = INNER_LOOP_ITERS * 10; + timing_t start, stop, cur; + + TIMING_NOW (start); + CALL (impl, arr, len, iters); + TIMING_NOW (stop); + TIMING_DIFF (cur, start, stop); + + TIMING_PRINT_MEAN ((double) cur, (double) iters); +} + +static volatile double arr1[SIZE]; +static volatile double arr2[SIZE]; + +int +test_main (void) +{ + size_t i; + + test_init (); + + /* Create 2 test arrays, one with 10% zeroes, 10% negative values, + 79% positive values and 1% infinity/NaN. The other contains + 50% inf, 50% NaN. */ + + for (i = 0; i < SIZE; i++) + { + int x = rand () & 255; + arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100); + if (x == 255) arr1[i] = __builtin_inf (); + if (x == 254) arr1[i] = __builtin_nan ("0"); + arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0"); + } + + FOR_EACH_IMPL (impl, 0) + { + printf ("%20s: ", impl->name); + do_one_test (impl, arr1, SIZE); + do_one_test (impl, arr2, SIZE); + putchar ('\n'); + } + + return ret; +} + +#include "../test-skeleton.c"