From patchwork Thu Oct 11 14:02:07 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Borkmann X-Patchwork-Id: 982498 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=iogearbox.net Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 42WCMd18zkz9s9N for ; Fri, 12 Oct 2018 01:02:21 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728469AbeJKV3i (ORCPT ); Thu, 11 Oct 2018 17:29:38 -0400 Received: from www62.your-server.de ([213.133.104.62]:43510 "EHLO www62.your-server.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728399AbeJKV3i (ORCPT ); Thu, 11 Oct 2018 17:29:38 -0400 Received: from [62.203.87.61] (helo=localhost) by www62.your-server.de with esmtpsa (TLSv1.2:DHE-RSA-AES256-GCM-SHA384:256) (Exim 4.89_1) (envelope-from ) id 1gAbXc-0005Cp-W1; Thu, 11 Oct 2018 16:02:17 +0200 From: Daniel Borkmann To: alexei.starovoitov@gmail.com Cc: netdev@vger.kernel.org, Daniel Borkmann Subject: [PATCH bpf-next 2/2] bpf, libbpf: simplify perf RB walk and do incremental updates Date: Thu, 11 Oct 2018 16:02:07 +0200 Message-Id: <20181011140207.27602-3-daniel@iogearbox.net> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20181011140207.27602-1-daniel@iogearbox.net> References: <20181011140207.27602-1-daniel@iogearbox.net> X-Authenticated-Sender: daniel@iogearbox.net X-Virus-Scanned: Clear (ClamAV 0.100.1/25027/Thu Oct 11 14:54:19 2018) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Clean up and improve bpf_perf_event_read_simple() ring walk a bit to use similar tail update scheme as in perf and bcc allowing the kernel to make forward progress not only after full timely walk. Also few other improvements to use realloc() instead of free() and malloc() combination and for the callback use proper perf_event_header instead of void pointer, so that real applications can use container_of() macro with proper type checking. Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map_perf_ring.c | 10 ++-- tools/lib/bpf/libbpf.c | 77 +++++++++++++---------------- tools/lib/bpf/libbpf.h | 14 +++--- tools/testing/selftests/bpf/trace_helpers.c | 7 +-- 4 files changed, 53 insertions(+), 55 deletions(-) diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 6d41323..bdaf406 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -50,15 +50,17 @@ static void int_exit(int signo) stop = true; } -static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) +static enum bpf_perf_event_ret +print_bpf_output(struct perf_event_header *event, void *private_data) { - struct event_ring_info *ring = priv; - struct perf_event_sample *e = event; + struct perf_event_sample *e = container_of(event, struct perf_event_sample, + header); + struct event_ring_info *ring = private_data; struct { struct perf_event_header header; __u64 id; __u64 lost; - } *lost = event; + } *lost = (typeof(lost))event; if (json_output) { jsonw_start_object(json_wtr); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1ac8856..35f4a77 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2448,58 +2448,51 @@ static void bpf_perf_write_tail(struct perf_event_mmap_page *header, } enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv) -{ - struct perf_event_mmap_page *header = mem; - __u64 data_head = bpf_perf_read_head(header); - __u64 data_tail = header->data_tail; - int ret = LIBBPF_PERF_EVENT_ERROR; - void *base, *begin, *end; - - if (data_head == data_tail) - return LIBBPF_PERF_EVENT_CONT; - - base = ((char *)header) + page_size; - - begin = base + data_tail % size; - end = base + data_head % size; - - while (begin != end) { - struct perf_event_header *ehdr; - - ehdr = begin; - if (begin + ehdr->size > base + size) { - long len = base + size - begin; - - if (*buf_len < ehdr->size) { - free(*buf); - *buf = malloc(ehdr->size); - if (!*buf) { +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) +{ + struct perf_event_mmap_page *header = mmap_mem; + void *base = ((__u8 *)header) + page_size; + int ret = LIBBPF_PERF_EVENT_CONT; + struct perf_event_header *ehdr; + __u64 data_head, data_tail; + size_t ehdr_size; + + for (data_tail = header->data_tail, + data_head = bpf_perf_read_head(header); + data_head != data_tail; + data_head = bpf_perf_read_head(header)) { + ehdr = base + (data_tail & (mmap_size - 1)); + ehdr_size = ehdr->size; + if (((void *)ehdr) + ehdr_size > base + mmap_size) { + void *copy_start = ehdr; + size_t len_first = base + mmap_size - copy_start; + size_t len_secnd = ehdr_size - len_first; + + if (*copy_size < ehdr_size) { + void *ptr = realloc(*copy_mem, ehdr_size); + + if (!ptr) { ret = LIBBPF_PERF_EVENT_ERROR; break; } - *buf_len = ehdr->size; + *copy_mem = ptr; + *copy_size = ehdr_size; } - memcpy(*buf, begin, len); - memcpy(*buf + len, base, ehdr->size - len); - ehdr = (void *)*buf; - begin = base + ehdr->size - len; - } else if (begin + ehdr->size == base + size) { - begin = base; - } else { - begin += ehdr->size; + memcpy(*copy_mem, copy_start, len_first); + memcpy(*copy_mem + len_first, base, len_secnd); + ehdr = *copy_mem; } - ret = fn(ehdr, priv); + ret = fn(ehdr, private_data); + + data_tail += ehdr_size; + bpf_perf_write_tail(header, data_tail); if (ret != LIBBPF_PERF_EVENT_CONT) break; - - data_tail += ehdr->size; } - bpf_perf_write_tail(header, data_tail); return ret; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8af8d36..16b5c95 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -284,12 +284,14 @@ enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_CONT = -2, }; -typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, - void *priv); -int bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, - void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv); +struct perf_event_header; +typedef enum bpf_perf_event_ret + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); struct nlattr; typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index cabe2a3..1764093 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -124,10 +124,11 @@ struct perf_event_sample { char data[]; }; -static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) +static enum bpf_perf_event_ret +bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) { - struct perf_event_sample *e = event; - perf_event_print_fn fn = priv; + struct perf_event_sample *e = (struct perf_event_sample *)hdr; + perf_event_print_fn fn = private_data; int ret; if (e->header.type == PERF_RECORD_SAMPLE) {