diff mbox series

[RFC,bpf-next,4/7] net: flow_dissector: handle no-skb use case

Message ID 20190205173629.160717-5-sdf@google.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series net: flow_dissector: trigger BPF hook when called from eth_get_headlen | expand

Commit Message

Stanislav Fomichev Feb. 5, 2019, 5:36 p.m. UTC
When flow_dissector is called without skb (with only data and hlen),
construct on-stack skb (which has a linear chunk of data passed
to the flow dissector). This should let us handle eth_get_headlen
case where only data is provided and we don't want to (yet) allocate
an skb.

Since this on-stack skb doesn't allocate its own data, we can't
add shinfo and need to be careful to avoid any code paths that use
it. Flow dissector BPF programs can only call bpf_skb_load_bytes helper,
which doesn't touch shinfo in our case (skb->len is the length of the
linear header so it exits early).

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/skbuff.h    |  5 +++
 net/core/flow_dissector.c | 95 +++++++++++++++++++++++++++++----------
 2 files changed, 76 insertions(+), 24 deletions(-)

Comments

Willem de Bruijn Feb. 5, 2019, 8:19 p.m. UTC | #1
On Tue, Feb 5, 2019 at 12:57 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> When flow_dissector is called without skb (with only data and hlen),
> construct on-stack skb (which has a linear chunk of data passed
> to the flow dissector). This should let us handle eth_get_headlen
> case where only data is provided and we don't want to (yet) allocate
> an skb.
>
> Since this on-stack skb doesn't allocate its own data, we can't
> add shinfo and need to be careful to avoid any code paths that use
> it. Flow dissector BPF programs can only call bpf_skb_load_bytes helper,
> which doesn't touch shinfo in our case (skb->len is the length of the
> linear header so it exits early).
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/skbuff.h    |  5 +++
>  net/core/flow_dissector.c | 95 +++++++++++++++++++++++++++++----------
>  2 files changed, 76 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index aa9a9983de80..5f1c085cb34c 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -1227,6 +1227,11 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
>                             const struct sk_buff *skb,
>                             struct flow_dissector *flow_dissector,
>                             struct bpf_flow_keys *flow_keys);
> +bool __flow_bpf_dissect(struct bpf_prog *prog,
> +                       void *data, __be16 proto,
> +                       int nhoff, int hlen,
> +                       struct flow_dissector *flow_dissector,
> +                       struct bpf_flow_keys *flow_keys);

nit: please use more descriptive name. Perhaps bpf_flow_dissect_raw
and rename __skb_flow_bpf_dissect to bpf_flow_dissect_skb.

> +bool __flow_bpf_dissect(struct bpf_prog *prog,
> +                       void *data, __be16 proto,
> +                       int nhoff, int hlen,
> +                       struct flow_dissector *flow_dissector,
> +                       struct bpf_flow_keys *flow_keys)
> +{
> +       struct bpf_skb_data_end *cb;
> +       struct sk_buff skb;
> +       u32 result;
> +
> +       __init_skb(&skb, data, hlen);
> +       skb_put(&skb, hlen);
> +       skb.protocol = proto;
> +
> +       init_flow_keys(flow_keys, &skb, nhoff);
> +
> +       cb = (struct bpf_skb_data_end *)skb.cb;
> +       cb->data_meta = skb.data;
> +       cb->data_end  = skb.data + skb_headlen(&skb);
> +
> +       result = BPF_PROG_RUN(prog, &skb);
> +
> +       clamp_flow_keys(flow_keys, hlen);
>
>         return result == BPF_OK;
>  }

Can__flow_bpf_dissect just construct an skb and then call
__skb_flow_bpf_dissect?

It will unnecessarily save and restore the control block, but that is
a relatively small cost (compared to, say, zeroing the entire skb).
Stanislav Fomichev Feb. 5, 2019, 8:45 p.m. UTC | #2
On 02/05, Willem de Bruijn wrote:
> On Tue, Feb 5, 2019 at 12:57 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > When flow_dissector is called without skb (with only data and hlen),
> > construct on-stack skb (which has a linear chunk of data passed
> > to the flow dissector). This should let us handle eth_get_headlen
> > case where only data is provided and we don't want to (yet) allocate
> > an skb.
> >
> > Since this on-stack skb doesn't allocate its own data, we can't
> > add shinfo and need to be careful to avoid any code paths that use
> > it. Flow dissector BPF programs can only call bpf_skb_load_bytes helper,
> > which doesn't touch shinfo in our case (skb->len is the length of the
> > linear header so it exits early).
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/linux/skbuff.h    |  5 +++
> >  net/core/flow_dissector.c | 95 +++++++++++++++++++++++++++++----------
> >  2 files changed, 76 insertions(+), 24 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index aa9a9983de80..5f1c085cb34c 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -1227,6 +1227,11 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
> >                             const struct sk_buff *skb,
> >                             struct flow_dissector *flow_dissector,
> >                             struct bpf_flow_keys *flow_keys);
> > +bool __flow_bpf_dissect(struct bpf_prog *prog,
> > +                       void *data, __be16 proto,
> > +                       int nhoff, int hlen,
> > +                       struct flow_dissector *flow_dissector,
> > +                       struct bpf_flow_keys *flow_keys);
> 
> nit: please use more descriptive name. Perhaps bpf_flow_dissect_raw
> and rename __skb_flow_bpf_dissect to bpf_flow_dissect_skb.
Agreed.

> > +bool __flow_bpf_dissect(struct bpf_prog *prog,
> > +                       void *data, __be16 proto,
> > +                       int nhoff, int hlen,
> > +                       struct flow_dissector *flow_dissector,
> > +                       struct bpf_flow_keys *flow_keys)
> > +{
> > +       struct bpf_skb_data_end *cb;
> > +       struct sk_buff skb;
> > +       u32 result;
> > +
> > +       __init_skb(&skb, data, hlen);
> > +       skb_put(&skb, hlen);
> > +       skb.protocol = proto;
> > +
> > +       init_flow_keys(flow_keys, &skb, nhoff);
> > +
> > +       cb = (struct bpf_skb_data_end *)skb.cb;
> > +       cb->data_meta = skb.data;
> > +       cb->data_end  = skb.data + skb_headlen(&skb);
> > +
> > +       result = BPF_PROG_RUN(prog, &skb);
> > +
> > +       clamp_flow_keys(flow_keys, hlen);
> >
> >         return result == BPF_OK;
> >  }
> 
> Can__flow_bpf_dissect just construct an skb and then call
> __skb_flow_bpf_dissect?
__skb_flow_bpf_dissect calls bpf_compute_data_pointers which calls
skb_metadata_len which touches shinfo. And I don't think I have a
clever way to handle that.

> 
> It will unnecessarily save and restore the control block, but that is
> a relatively small cost (compared to, say, zeroing the entire skb).
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aa9a9983de80..5f1c085cb34c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1227,6 +1227,11 @@  bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 			    const struct sk_buff *skb,
 			    struct flow_dissector *flow_dissector,
 			    struct bpf_flow_keys *flow_keys);
+bool __flow_bpf_dissect(struct bpf_prog *prog,
+			void *data, __be16 proto,
+			int nhoff, int hlen,
+			struct flow_dissector *flow_dissector,
+			struct bpf_flow_keys *flow_keys);
 bool __skb_flow_dissect(struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index dddcc37c0462..87167b74f59a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,6 +683,28 @@  static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 	}
 }
 
+static inline void init_flow_keys(struct bpf_flow_keys *flow_keys,
+				  struct sk_buff *skb, int nhoff)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	memset(cb, 0, sizeof(*cb));
+	memset(flow_keys, 0, sizeof(*flow_keys));
+
+	flow_keys->nhoff = nhoff;
+	flow_keys->thoff = nhoff;
+
+	cb->qdisc_cb.flow_keys = flow_keys;
+}
+
+static inline void clamp_flow_keys(struct bpf_flow_keys *flow_keys,
+				   int hlen)
+{
+	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, hlen);
+	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
+				   flow_keys->nhoff, hlen);
+}
+
 bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 			    const struct sk_buff *skb,
 			    struct flow_dissector *flow_dissector,
@@ -702,13 +724,9 @@  bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 
 	/* Save Control Block */
 	memcpy(&cb_saved, cb, sizeof(cb_saved));
-	memset(cb, 0, sizeof(*cb));
 
 	/* Pass parameters to the BPF program */
-	memset(flow_keys, 0, sizeof(*flow_keys));
-	cb->qdisc_cb.flow_keys = flow_keys;
-	flow_keys->nhoff = skb_network_offset(skb);
-	flow_keys->thoff = flow_keys->nhoff;
+	init_flow_keys(flow_keys, skb, skb_network_offset(skb));
 
 	bpf_compute_data_pointers((struct sk_buff *)skb);
 	result = BPF_PROG_RUN(prog, skb);
@@ -716,9 +734,34 @@  bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 	/* Restore state */
 	memcpy(cb, &cb_saved, sizeof(cb_saved));
 
-	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
-	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
-				   flow_keys->nhoff, skb->len);
+	clamp_flow_keys(flow_keys, skb->len);
+
+	return result == BPF_OK;
+}
+
+bool __flow_bpf_dissect(struct bpf_prog *prog,
+			void *data, __be16 proto,
+			int nhoff, int hlen,
+			struct flow_dissector *flow_dissector,
+			struct bpf_flow_keys *flow_keys)
+{
+	struct bpf_skb_data_end *cb;
+	struct sk_buff skb;
+	u32 result;
+
+	__init_skb(&skb, data, hlen);
+	skb_put(&skb, hlen);
+	skb.protocol = proto;
+
+	init_flow_keys(flow_keys, &skb, nhoff);
+
+	cb = (struct bpf_skb_data_end *)skb.cb;
+	cb->data_meta = skb.data;
+	cb->data_end  = skb.data + skb_headlen(&skb);
+
+	result = BPF_PROG_RUN(prog, &skb);
+
+	clamp_flow_keys(flow_keys, hlen);
 
 	return result == BPF_OK;
 }
@@ -754,8 +797,10 @@  bool __skb_flow_dissect(struct net *net,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
-	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+	struct bpf_prog *attached = NULL;
+	struct bpf_flow_keys flow_keys;
+	enum flow_dissect_ret fdret;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -795,30 +840,32 @@  bool __skb_flow_dissect(struct net *net,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
-	if (skb) {
-		struct bpf_flow_keys flow_keys;
-		struct bpf_prog *attached = NULL;
+	rcu_read_lock();
 
-		rcu_read_lock();
+	if (!net && skb)
+		net = skb_net(skb);
+	if (net)
+		attached = rcu_dereference(net->flow_dissector_prog);
 
-		if (!net && skb)
-			net = skb_net(skb);
-		if (net)
-			attached = rcu_dereference(net->flow_dissector_prog);
-		WARN_ON_ONCE(!net);
+	WARN_ON_ONCE(!net);
 
-		if (attached) {
+	if (attached) {
+		if (skb)
 			ret = __skb_flow_bpf_dissect(attached, skb,
 						     flow_dissector,
 						     &flow_keys);
-			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
-						 target_container);
-			rcu_read_unlock();
-			return ret;
-		}
+		else
+			ret = __flow_bpf_dissect(attached, data, proto, nhoff,
+						 hlen, flow_dissector,
+						 &flow_keys);
+		__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+					 target_container);
 		rcu_read_unlock();
+		return ret;
 	}
 
+	rcu_read_unlock();
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);