diff mbox

[RFC,net-next,13/14] samples: bpf: example of stateful socket filtering

Message ID 1403913966-4927-14-git-send-email-ast@plumgrid.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Alexei Starovoitov June 28, 2014, 12:06 a.m. UTC
this socket filter example does:

- creates a hashtable in kernel with key 4 bytes and value 8 bytes

- populates map[6] = 0; map[17] = 0;  // 6 - tcp_proto, 17 - udp_proto

- loads eBPF program:
  r0 = skb[14 + 9]; // load one byte of ip->proto
  *(u32*)(fp - 4) = r0;
  value = bpf_map_lookup_elem(map_id, fp - 4);
  if (value)
       (*(u64*)value) += 1;

- attaches this program to eth0 raw socket

- every second user space reads map[6] and map[17] to see how many
  TCP and UDP packets were seen on eth0

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 samples/bpf/.gitignore     |    1 +
 samples/bpf/Makefile       |   13 ++++
 samples/bpf/sock_example.c |  160 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 174 insertions(+)
 create mode 100644 samples/bpf/.gitignore
 create mode 100644 samples/bpf/Makefile
 create mode 100644 samples/bpf/sock_example.c

Comments

Andy Lutomirski June 28, 2014, 12:21 a.m. UTC | #1
On Fri, Jun 27, 2014 at 5:06 PM, Alexei Starovoitov <ast@plumgrid.com> wrote:
> this socket filter example does:
>
> - creates a hashtable in kernel with key 4 bytes and value 8 bytes
>
> - populates map[6] = 0; map[17] = 0;  // 6 - tcp_proto, 17 - udp_proto
>
> - loads eBPF program:
>   r0 = skb[14 + 9]; // load one byte of ip->proto
>   *(u32*)(fp - 4) = r0;
>   value = bpf_map_lookup_elem(map_id, fp - 4);
>   if (value)
>        (*(u64*)value) += 1;

In the code below, this is XADD.  Is there anything that validates
that shared things like this can only be poked at by atomic
operations?

--Andy
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov June 28, 2014, 6:21 a.m. UTC | #2
On Fri, Jun 27, 2014 at 5:21 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Fri, Jun 27, 2014 at 5:06 PM, Alexei Starovoitov <ast@plumgrid.com> wrote:
>> this socket filter example does:
>>
>> - creates a hashtable in kernel with key 4 bytes and value 8 bytes
>>
>> - populates map[6] = 0; map[17] = 0;  // 6 - tcp_proto, 17 - udp_proto
>>
>> - loads eBPF program:
>>   r0 = skb[14 + 9]; // load one byte of ip->proto
>>   *(u32*)(fp - 4) = r0;
>>   value = bpf_map_lookup_elem(map_id, fp - 4);
>>   if (value)
>>        (*(u64*)value) += 1;
>
> In the code below, this is XADD.  Is there anything that validates
> that shared things like this can only be poked at by atomic
> operations?

Correct. The asm code uses xadd to increment packet stats.
It's up to the program itself to decide what it's doing.
Some programs may prefer speed vs accuracy when counting
and they will be using regular "ld, add, st", instead of xadd.
Verifier checks that programs can only access a valid memory
region. The program itself needs to do something sensible with it.
Theoretically I can add a check to verifier that shared map elements
are read-only and xadd-only, but that limits usability and unnecessary.
We actually do have a use case when we do a regular add, since
'lock add' is too costly at high event rates.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
new file mode 100644
index 000000000000..5465c6e92a00
--- /dev/null
+++ b/samples/bpf/.gitignore
@@ -0,0 +1 @@ 
+sock_example
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
new file mode 100644
index 000000000000..95c990151644
--- /dev/null
+++ b/samples/bpf/Makefile
@@ -0,0 +1,13 @@ 
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := sock_example
+
+sock_example-objs := sock_example.o libbpf.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_libbpf.o += -I$(objtree)/usr/include
+HOSTCFLAGS_sock_example.o += -I$(objtree)/usr/include
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
new file mode 100644
index 000000000000..5cf091571d4f
--- /dev/null
+++ b/samples/bpf/sock_example.c
@@ -0,0 +1,160 @@ 
+/* eBPF example program:
+ * - creates a hashtable in kernel with key 4 bytes and value 8 bytes
+ *
+ * - populates map[6] = 0; map[17] = 0;  // 6 - tcp_proto, 17 - udp_proto
+ *
+ * - loads eBPF program:
+ *   r0 = skb[14 + 9]; // load one byte of ip->proto
+ *   *(u32*)(fp - 4) = r0;
+ *   value = bpf_map_lookup_elem(map_id, fp - 4);
+ *   if (value)
+ *        (*(u64*)value) += 1;
+ *
+ * - attaches this program to eth0 raw socket
+ *
+ * - every second user space reads map[6] and map[17] to see how many
+ *   TCP and UDP packets were seen on eth0
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <asm-generic/socket.h>
+#include <linux/netlink.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <linux/if_packet.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <linux/filter.h>
+#include <stdlib.h>
+#include <arpa/inet.h>
+#include "libbpf.h"
+
+static int open_raw_sock(const char *name)
+{
+	struct sockaddr_ll sll;
+	struct packet_mreq mr;
+	struct ifreq ifr;
+	int sock;
+
+	sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+	if (sock < 0) {
+		printf("cannot open socket!\n");
+		return -1;
+	}
+
+	memset(&ifr, 0, sizeof(ifr));
+	strncpy((char *)ifr.ifr_name, name, IFNAMSIZ);
+	if (ioctl(sock, SIOCGIFINDEX, &ifr) < 0) {
+		printf("ioctl: %s\n", strerror(errno));
+		close(sock);
+		return -1;
+	}
+
+	memset(&sll, 0, sizeof(sll));
+	sll.sll_family = AF_PACKET;
+	sll.sll_ifindex = ifr.ifr_ifindex;
+	sll.sll_protocol = htons(ETH_P_ALL);
+	if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+		printf("bind: %s\n", strerror(errno));
+		close(sock);
+		return -1;
+	}
+
+	memset(&mr, 0, sizeof(mr));
+	mr.mr_ifindex = ifr.ifr_ifindex;
+	mr.mr_type = PACKET_MR_PROMISC;
+	if (setsockopt(sock, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) < 0) {
+		printf("set_promisc: %s\n", strerror(errno));
+		close(sock);
+		return -1;
+	}
+	return sock;
+}
+
+#define MAP_ID 1
+
+static int test_sock(void)
+{
+	static struct sock_filter_int prog[] = {
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+		BPF_LD_ABS(BPF_B, 14 + 9 /* R0 = ip->proto */),
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+		BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, MAP_ID), /* r1 = MAP_ID */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), /* r1 = 1 */
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+		BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), /* r0 = 0 */
+		BPF_EXIT_INSN(),
+	};
+
+	int sock = -1, prog_id = 1, i, key;
+	long long value = 0, tcp_cnt, udp_cnt;
+
+	if (bpf_create_map(MAP_ID, sizeof(key), sizeof(value), 2) < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		/* must have been left from previous aborted run, delete it */
+		goto cleanup;
+	}
+
+	key = 6; /* tcp */
+	if (bpf_update_elem(MAP_ID, &key, &value) < 0) {
+		printf("update err key=%d\n", key);
+		goto cleanup;
+	}
+
+	key = 17; /* udp */
+	if (bpf_update_elem(MAP_ID, &key, &value) < 0) {
+		printf("update err key=%d\n", key);
+		goto cleanup;
+	}
+
+	prog_id = bpf_prog_load(prog_id, BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL");
+	if (prog_id < 0) {
+		printf("failed to load prog '%s'\n", strerror(errno));
+		goto cleanup;
+	}
+
+	sock = open_raw_sock("eth0");
+
+	if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER_EBPF, &prog_id, sizeof(prog_id)) < 0) {
+		printf("setsockopt %d\n", errno);
+		goto cleanup;
+	}
+
+	for (i = 0; i < 10; i++) {
+		key = 6;
+		if (bpf_lookup_elem(MAP_ID, &key, &tcp_cnt) < 0) {
+			printf("lookup err\n");
+			break;
+		}
+		key = 17;
+		if (bpf_lookup_elem(MAP_ID, &key, &udp_cnt) < 0) {
+			printf("lookup err\n");
+			break;
+		}
+		printf("TCP %lld UDP %lld packets\n", tcp_cnt, udp_cnt);
+		sleep(1);
+	}
+
+cleanup:
+	close(sock);
+	bpf_prog_unload(prog_id);
+
+	bpf_delete_map(MAP_ID);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_sock();
+	return 0;
+}