add concurrent malloc benchmark

Message ID	54F6E0CB.9090805@web.de
State	New
Headers	show Return-Path: <libc-alpha-return-57550-incoming=patchwork.ozlabs.org@sourceware.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:message-id:date:from:mime-version:to:subject :content-type:content-transfer-encoding; q=dns; s=default; b=feA b0gdOyp6CtoSSBIXp26EI4/Ko3zhYrG/N/Xg5iI0xvM1n+7G29yuUcACUZpngcHy RbQ0YLajE44HnTUQoS1IH3WvMFbyK4EhHCHhUezxgdee+nYA46bGQphGGOfJ10jT yMZbgbYgO4Rb7vU9s95Q3TGTJprn2k+8SMKoZO1w= Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org Message-ID: <54F6E0CB.9090805@web.de> Date: Wed, 04 Mar 2015 11:39:07 +0100 From: Leonhard Holz <leonhard.holz@web.de> User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Thunderbird/31.5.0 MIME-Version: 1.0 To: libc-alpha@sourceware.org Subject: [PATCH] add concurrent malloc benchmark Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit

diff --git a/benchtests/Makefile b/benchtests/Makefile index 08603a2..343e56f 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -44,11 +44,12 @@ benchset := $(string-bench-all) $(stdlib-bench) CFLAGS-bench-ffs.c += -fno-builtin CFLAGS-bench-ffsll.c += -fno-builtin -bench-malloc := malloc-thread +bench-malloc := malloc-thread malloc-concurrent $(addprefix $(objpfx)bench-,$(bench-math)): $(libm) $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library) $(objpfx)bench-malloc-thread: $(shared-thread-library) +$(objpfx)bench-malloc-concurrent: $(shared-thread-library) ^L @@ -116,10 +117,11 @@ bench-set: $(binaries-benchset) done bench-malloc: $(binaries-bench-malloc) - run=$(objpfx)bench-malloc-thread; \ - for thr in 1 8 16 32; do \ - echo "Running $${run} $${thr}"; \ - $(run-bench) $${thr} > $${run}-$${thr}.out; \ + for run in $^; do \ + for thr in 1 8 16 32; do \ + echo "Running $${run} $${thr}"; \ + $(run-bench) $${thr} > $${run}-$${thr}.out; \ + done; \ done # Build and execute the benchmark functions. This target generates JSON diff --git a/benchtests/bench-malloc-concurrent.c b/benchtests/bench-malloc-concurrent.c index e69de29..75bd586 100644 --- a/benchtests/bench-malloc-concurrent.c +++ b/benchtests/bench-malloc-concurrent.c @@ -0,0 +1,315 @@ +/* Benchmark malloc and free functions. + Copyright (C) 2013-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <unistd.h> + +#include "bench-timing.h" +#include "json-lib.h" + +/* Benchmark duration in seconds. */ +#define BENCHMARK_DURATION 60 +#define RAND_SEED 88 + +#ifndef NUM_THREADS +# define NUM_THREADS 1 +#endif + +/* Maximum memory that can be allocated at any one time is: + + NUM_THREADS * WORKING_SET_SIZE * MAX_ALLOCATION_SIZE + + However due to the distribution of the random block sizes + the typical amount allocated will be much smaller. */ +#define WORKING_SET_SIZE 1024 + +#define MIN_ALLOCATION_SIZE 4 +#define MAX_ALLOCATION_SIZE 32768 + +/* Get a random block size with an inverse square distribution. */ +static unsigned int +get_block_size (unsigned int rand_data) +{ + /* Inverse square. */ + const float exponent = -2; + /* Minimum value of distribution. */ + const float dist_min = MIN_ALLOCATION_SIZE; + /* Maximum value of distribution. */ + const float dist_max = MAX_ALLOCATION_SIZE; + + float min_pow = powf (dist_min, exponent + 1); + float max_pow = powf (dist_max, exponent + 1); + + float r = (float) rand_data / RAND_MAX; + + return (unsigned int) powf ((max_pow - min_pow) * r + min_pow, + 1 / (exponent + 1)); +} + +static pthread_mutex_t index_lock[WORKING_SET_SIZE]; + +static void +init_index_locks (void) +{ + for (size_t i = 0; i < WORKING_SET_SIZE; i++) + pthread_mutex_init (&index_lock[i], 0); +} + +#define NUM_BLOCK_SIZES 8000 +#define NUM_OFFSETS ((WORKING_SET_SIZE) * 4) + +static unsigned int random_block_sizes[NUM_BLOCK_SIZES]; +static unsigned int random_offsets[NUM_OFFSETS]; + +static void +init_random_values (void) +{ + for (size_t i = 0; i < NUM_BLOCK_SIZES; i++) + random_block_sizes[i] = get_block_size (rand ()); + + for (size_t i = 0; i < NUM_OFFSETS; i++) + random_offsets[i] = rand () % WORKING_SET_SIZE; +} + +static unsigned int +get_random_block_size (unsigned int *state) +{ + unsigned int idx = *state; + + if (idx >= NUM_BLOCK_SIZES - 1) + idx = 0; + else + idx++; + + *state = idx; + + return random_block_sizes[idx]; +} + +static unsigned int +get_random_offset (unsigned int *state) +{ + unsigned int idx = *state; + + if (idx >= NUM_OFFSETS - 1) + idx = 0; + else + idx++; + + *state = idx; + + return random_offsets[idx]; +} + +static volatile bool timeout; + +static void +alarm_handler (int signum) +{ + timeout = true; +} + +/* Allocate and free blocks in a random order. */ +static size_t +malloc_benchmark_loop (void **ptr_arr, size_t start_offset) +{ + unsigned int offset_state = start_offset, block_state = 0; + size_t iters = 0; + + while (!timeout) + { + unsigned int next_idx = get_random_offset (&offset_state); + unsigned int next_block = get_random_block_size (&block_state); + + pthread_mutex_lock (&index_lock[next_idx]); + free (ptr_arr[next_idx]); + ptr_arr[next_idx] = malloc (next_block); + pthread_mutex_unlock (&index_lock[next_idx]); + + iters++; + } + + return iters; +} + +struct thread_args +{ + size_t iters; + size_t start_offset; + void **working_set; + timing_t elapsed; +}; + +static void * +benchmark_thread (void *arg) +{ + struct thread_args *args = (struct thread_args *) arg; + size_t iters; + void *thread_set = args->working_set; + timing_t start, stop; + + TIMING_NOW (start); + iters = malloc_benchmark_loop (thread_set, args->start_offset); + TIMING_NOW (stop); + + TIMING_DIFF (args->elapsed, start, stop); + args->iters = iters; + + return NULL; +} + +static timing_t +do_benchmark (size_t num_threads, size_t *iters) +{ + timing_t elapsed = 0; + + if (num_threads == 1) + { + timing_t start, stop; + void *working_set[WORKING_SET_SIZE]; + + memset (working_set, 0, sizeof (working_set)); + + TIMING_NOW (start); + *iters = malloc_benchmark_loop (working_set, 0); + TIMING_NOW (stop); + + TIMING_DIFF (elapsed, start, stop); + } + else + { + struct thread_args args[num_threads]; + void *working_set[WORKING_SET_SIZE]; + pthread_t threads[num_threads]; + + memset (working_set, 0, sizeof (working_set)); + + *iters = 0; + + for (size_t i = 0; i < num_threads; i++) + { + args[i].working_set = working_set; + args[i].start_offset = (WORKING_SET_SIZE / num_threads) * i; + pthread_create(&threads[i], NULL, benchmark_thread, &args[i]); + } + + for (size_t i = 0; i < num_threads; i++) + { + pthread_join(threads[i], NULL); + TIMING_ACCUM (elapsed, args[i].elapsed); + *iters += args[i].iters; + } + } + return elapsed; +} + +static void usage(const char *name) +{ + fprintf (stderr, "%s: <num_threads>\n", name); + exit (1); +} + +int +main (int argc, char **argv) +{ + timing_t cur; + size_t iters = 0, num_threads = 1; + unsigned long res; + json_ctx_t json_ctx; + double d_total_s, d_total_i; + struct sigaction act; + + if (argc == 1) + num_threads = 1; + else if (argc == 2) + { + long ret; + + errno = 0; + ret = strtol(argv[1], NULL, 10); + + if (errno || ret == 0) + usage(argv[0]); + + num_threads = ret; + } + else + usage(argv[0]); + + init_index_locks (); + init_random_values (); + + json_init (&json_ctx, 0, stdout); + + json_document_begin (&json_ctx); + + json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); + + json_attr_object_begin (&json_ctx, "functions"); + + json_attr_object_begin (&json_ctx, "malloc"); + + json_attr_object_begin (&json_ctx, ""); + + TIMING_INIT (res); + + (void) res; + + memset (&act, 0, sizeof (act)); + act.sa_handler = &alarm_handler; + + sigaction (SIGALRM, &act, NULL); + + alarm (BENCHMARK_DURATION); + + cur = do_benchmark (num_threads, &iters); + + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + + d_total_s = cur; + d_total_i = iters; + + json_attr_double (&json_ctx, "duration", d_total_s); + json_attr_double (&json_ctx, "iterations", d_total_i); + json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i); + json_attr_double (&json_ctx, "max_rss", usage.ru_maxrss); + + json_attr_double (&json_ctx, "threads", num_threads); + json_attr_double (&json_ctx, "min_size", MIN_ALLOCATION_SIZE); + json_attr_double (&json_ctx, "max_size", MAX_ALLOCATION_SIZE); + json_attr_double (&json_ctx, "random_seed", RAND_SEED); + + json_attr_object_end (&json_ctx); + + json_attr_object_end (&json_ctx); + + json_attr_object_end (&json_ctx); + + json_document_end (&json_ctx); + + return 0; +}

add concurrent malloc benchmark

Commit Message

Patch