diff mbox series

[RFC,1/3] ebpf: add next_skb_frag bpf helper for sk filter

Message ID 1528491607-10399-2-git-send-email-tushar.n.dave@oracle.com
State RFC, archived
Delegated to: BPF Maintainers
Headers show
Series BPF socket filter to deal with skb frags | expand

Commit Message

Tushar Dave June 8, 2018, 9 p.m. UTC
Today socket filter only deals with linear skbs. This change allows
ebpf programs to look into non-linear skb e.g. skb frags. This will be
useful when users need to look into data which is not contained in the
linear part of skb.

Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
 include/linux/filter.h                    |  2 ++
 include/uapi/linux/bpf.h                  | 10 ++++++-
 net/core/filter.c                         | 44 +++++++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h            | 10 ++++++-
 tools/testing/selftests/bpf/bpf_helpers.h |  2 ++
 5 files changed, 64 insertions(+), 4 deletions(-)

Comments

Daniel Borkmann June 8, 2018, 9:27 p.m. UTC | #1
On 06/08/2018 11:00 PM, Tushar Dave wrote:
> Today socket filter only deals with linear skbs. This change allows
> ebpf programs to look into non-linear skb e.g. skb frags. This will be
> useful when users need to look into data which is not contained in the
> linear part of skb.

Hmm, I don't think this statement is correct in its form here ... they
can handle non-linear skbs just fine.

Straight forward way is to use bpf_skb_load_bytes(). It's simple and uses
internally skb_header_pointer(), and that one of course walks everything
if it really has to via skb_copy_bits() (page frags _and_ frag list). And
if you need to look into mac/net headers that may otherwise not be accessible
anymore from socket layer, there's bpf_skb_load_bytes_relative() helper
which is effectively doing the negative offset trick from ld_abs/ind more
efficient for multi-byte loads.

Thanks,
Daniel
Tushar Dave June 8, 2018, 9:46 p.m. UTC | #2
On 06/08/2018 02:27 PM, Daniel Borkmann wrote:
> On 06/08/2018 11:00 PM, Tushar Dave wrote:
>> Today socket filter only deals with linear skbs. This change allows
>> ebpf programs to look into non-linear skb e.g. skb frags. This will be
>> useful when users need to look into data which is not contained in the
>> linear part of skb.
> 
> Hmm, I don't think this statement is correct in its form here ... they
> can handle non-linear skbs just fine.
Thanks Daniel for your reply.
> 
> Straight forward way is to use bpf_skb_load_bytes(). It's simple and uses
> internally skb_header_pointer(), and that one of course walks everything
> if it really has to via skb_copy_bits() (page frags _and_ frag list). And
> if you need to look into mac/net headers that may otherwise not be accessible
> anymore from socket layer, there's bpf_skb_load_bytes_relative() helper
> which is effectively doing the negative offset trick from ld_abs/ind more
> efficient for multi-byte loads.
I'm looking into bpf_skb_load_bytes and friends.

Thanks.
-Tushar
> 
> Thanks,
> Daniel
>
Tushar Dave June 8, 2018, 10:24 p.m. UTC | #3
On 06/08/2018 02:46 PM, Tushar Dave wrote:
> 
> 
> On 06/08/2018 02:27 PM, Daniel Borkmann wrote:
>> On 06/08/2018 11:00 PM, Tushar Dave wrote:
>>> Today socket filter only deals with linear skbs. This change allows
>>> ebpf programs to look into non-linear skb e.g. skb frags. This will be
>>> useful when users need to look into data which is not contained in the
>>> linear part of skb.
>>
>> Hmm, I don't think this statement is correct in its form here ... they
>> can handle non-linear skbs just fine.
> Thanks Daniel for your reply.
>>
>> Straight forward way is to use bpf_skb_load_bytes(). It's simple and uses
>> internally skb_header_pointer(), and that one of course walks everything
>> if it really has to via skb_copy_bits() (page frags _and_ frag list). And
>> if you need to look into mac/net headers that may otherwise not be 
>> accessible
>> anymore from socket layer, there's bpf_skb_load_bytes_relative() helper
>> which is effectively doing the negative offset trick from ld_abs/ind more
>> efficient for multi-byte loads.
> I'm looking into bpf_skb_load_bytes and friends.

Daniel,

While I am trying to see if I can use exiting bpf_skb_load helpers, I am
wondering socket filter based ebpf program are allowed to change packet
data? In other words, can we use them to build firewall?

Thanks.

-Tushar
> 
> Thanks.
> -Tushar
>>
>> Thanks,
>> Daniel
>>
>
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9dbcb9d..603b8bf 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -500,6 +500,7 @@  struct sk_filter {
 
 struct bpf_skb_data_end {
 	struct qdisc_skb_cb qdisc_cb;
+	u8 index;
 	void *data_meta;
 	void *data_end;
 };
@@ -534,6 +535,7 @@  static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 	BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
 	cb->data_meta = skb->data - skb_metadata_len(skb);
 	cb->data_end  = skb->data + skb_headlen(skb);
+	cb->index = 0;
 }
 
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333..5fe9668 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1902,6 +1902,13 @@  struct bpf_stack_build_id {
  *		egress otherwise). This is the only flag supported for now.
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_next_skb_frag(struct sk_buff *skb)
+ *	Description
+ *		This helper allows users to look into non-linear part of skb
+ *		e.g. skb frags.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1976,7 +1983,8 @@  struct bpf_stack_build_id {
 	FN(fib_lookup),			\
 	FN(sock_hash_update),		\
 	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),
+	FN(sk_redirect_hash),		\
+	FN(next_skb_frag),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 51ea7dd..fd8e90f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3752,6 +3752,38 @@  static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_next_skb_frag, struct sk_buff *, skb)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+	const skb_frag_t *frag;
+
+	if (skb->data_len == 0)
+		return -ENODATA;
+
+	if (cb->index == (u8)skb_shinfo(skb)->nr_frags)
+		return -ENODATA;
+
+	/* get the frag start and end address into data_meta and data_end
+	 * respectively so eBPF program can look into skb frag
+	 */
+	frag = &skb_shinfo(skb)->frags[cb->index];
+	cb->data_meta = page_address(skb_frag_page(frag)) +
+			frag->page_offset;
+	cb->data_end = cb->data_meta + skb_frag_size(frag);
+
+	/* update frag index */
+	cb->index++;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_next_skb_frag_proto = {
+	.func		= bpf_next_skb_frag,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
@@ -4415,6 +4447,8 @@  static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_next_skb_frag:
+		return &bpf_next_skb_frag_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4698,10 +4732,16 @@  static bool sk_filter_is_valid_access(int off, int size,
 				      struct bpf_insn_access_aux *info)
 {
 	switch (off) {
-	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data):
-	case bpf_ctx_range(struct __sk_buff, data_meta):
+		info->reg_type = PTR_TO_PACKET;
+		break;
 	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d94d333..5fe9668 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1902,6 +1902,13 @@  struct bpf_stack_build_id {
  *		egress otherwise). This is the only flag supported for now.
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_next_skb_frag(struct sk_buff *skb)
+ *	Description
+ *		This helper allows users to look into non-linear part of skb
+ *		e.g. skb frags.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1976,7 +1983,8 @@  struct bpf_stack_build_id {
 	FN(fib_lookup),			\
 	FN(sock_hash_update),		\
 	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),
+	FN(sk_redirect_hash),		\
+	FN(next_skb_frag),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 8f143df..51f2153 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -114,6 +114,8 @@  static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
 static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
 			     int plen, __u32 flags) =
 	(void *) BPF_FUNC_fib_lookup;
+static unsigned long long (*bpf_next_skb_frag)(void *ctx) =
+	(void *) BPF_FUNC_next_skb_frag;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions