diff mbox series

[bpf-next,1/2] bpf, libbpf: use proper barriers in perf RB walk

Message ID 20181011140207.27602-2-daniel@iogearbox.net
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series Two libbpf RB walk improvements | expand

Commit Message

Daniel Borkmann Oct. 11, 2018, 2:02 p.m. UTC
User proper CPU barrier instead of just a compile barrier when fetching
ring's data_head in bpf_perf_event_read_simple() which is not correct.
Also, add two small helpers bpf_perf_read_head() and bpf_perf_write_tail()
to make used barriers more obvious and a comment to what they pair to.

Fixes: d0cabbb021be ("tools: bpf: move the event reading loop to libbpf")
Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 52 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 176cf55..1ac8856 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -20,6 +20,7 @@ 
 #include <fcntl.h>
 #include <errno.h>
 #include <asm/unistd.h>
+#include <asm/barrier.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/bpf.h>
@@ -27,6 +28,7 @@ 
 #include <linux/list.h>
 #include <linux/limits.h>
 #include <linux/perf_event.h>
+#include <linux/compiler.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -2404,18 +2406,58 @@  int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	return 0;
 }
 
+/*
+ * Comment from kernel/events/ring_buffer.c:
+ *
+ * Since the mmap() consumer (userspace) can run on a different CPU:
+ *
+ *   kernel				user
+ *
+ *   if (LOAD ->data_tail) {		LOAD ->data_head
+ *			(A)		smp_rmb()	(C)
+ *	STORE $data			LOAD $data
+ *	smp_wmb()	(B)		smp_mb()	(D)
+ *	STORE ->data_head		STORE ->data_tail
+ *   }
+ *
+ * Where A pairs with D, and B pairs with C.
+ *
+ * In our case (A) is a control dependency that separates the load of
+ * the ->data_tail and the stores of $data. In case ->data_tail
+ * indicates there is no room in the buffer to store $data we do not.
+ *
+ * D needs to be a full barrier since it separates the data READ
+ * from the tail WRITE.
+ *
+ * For B a WMB is sufficient since it separates two WRITEs, and for C
+ * an RMB is sufficient since it separates two READs.
+ */
+static __u64 bpf_perf_read_head(struct perf_event_mmap_page *header)
+{
+	__u64 data_head = READ_ONCE(header->data_head);
+
+	rmb();
+	return data_head;
+}
+
+static void bpf_perf_write_tail(struct perf_event_mmap_page *header,
+				__u64 data_tail)
+{
+	mb();
+	header->data_tail = data_tail;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mem, unsigned long size,
 			   unsigned long page_size, void **buf, size_t *buf_len,
 			   bpf_perf_event_print_t fn, void *priv)
 {
-	volatile struct perf_event_mmap_page *header = mem;
+	struct perf_event_mmap_page *header = mem;
+	__u64 data_head = bpf_perf_read_head(header);
 	__u64 data_tail = header->data_tail;
-	__u64 data_head = header->data_head;
 	int ret = LIBBPF_PERF_EVENT_ERROR;
 	void *base, *begin, *end;
 
-	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
 	if (data_head == data_tail)
 		return LIBBPF_PERF_EVENT_CONT;
 
@@ -2458,8 +2500,6 @@  bpf_perf_event_read_simple(void *mem, unsigned long size,
 		data_tail += ehdr->size;
 	}
 
-	__sync_synchronize(); /* smp_mb() */
-	header->data_tail = data_tail;
-
+	bpf_perf_write_tail(header, data_tail);
 	return ret;
 }