Message ID | 20200227032013.12385-9-dsahern@kernel.org |
---|---|
State | RFC |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Add support for XDP in egress path | expand |
On 2020/2/27 上午11:20, David Ahern wrote: > From: David Ahern <dahern@digitalocean.com> > > Add support to run Tx path program on packets arriving at a tun > device as an skb. > > XDP_TX return code means move the packet to the Tx path of the device. > For a program run in the Tx / egress path, XDP_TX is essentially the > same as "continue on" which is XDP_PASS. > > Conceptually, XDP_REDIRECT for this path can work the same as it > does for the Rx path, but that return code is left for a follow > on series. > > Signed-off-by: Prashant Bhole <prashantbhole.linux@gmail.com> > Signed-off-by: David Ahern <dahern@digitalocean.com> > --- > drivers/net/tun.c | 69 ++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 66 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 6aae398b904b..dcae6521a39d 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -1059,6 +1059,63 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, > return len; > } > > +static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb) > +{ > + if (skb_shared(skb) || skb_cloned(skb)) { > + struct sk_buff *nskb; > + > + nskb = skb_copy(skb, GFP_ATOMIC); > + consume_skb(skb); > + return nskb; > + } > + > + return skb; > +} > + > +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun, > + struct net_device *dev, > + struct sk_buff *skb) > +{ > + struct bpf_prog *xdp_prog; > + u32 act = XDP_PASS; > + > + xdp_prog = rcu_dereference(tun->xdp_egress_prog); > + if (xdp_prog) { > + struct xdp_txq_info txq = { .dev = dev }; > + struct xdp_buff xdp; > + > + skb = tun_prepare_xdp_skb(skb); > + if (!skb) { > + act = XDP_DROP; > + goto out; > + } > + > + xdp.txq = &txq; > + > + act = do_xdp_generic_core(skb, &xdp, xdp_prog); > + switch (act) { > + case XDP_TX: /* for Tx path, XDP_TX == XDP_PASS */ > + act = XDP_PASS; > + break; Jute a note here, I agree for TX XDP it may be better to do this. But for offloaded program we need different semantic. Or we can deal this with attach types? Thanks > + case XDP_PASS: > + break; > + case XDP_REDIRECT: > + /* fall through */ > + default: > + bpf_warn_invalid_xdp_action(act); > + /* fall through */ > + case XDP_ABORTED: > + trace_xdp_exception(tun->dev, xdp_prog, act); > + /* fall through */ > + case XDP_DROP: > + break; > + } > + } > + > +out: > + return act; > +} > + > /* Net device start xmit */ > static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > { > @@ -1066,6 +1123,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > int txq = skb->queue_mapping; > struct tun_file *tfile; > int len = skb->len; > + u32 act; > > rcu_read_lock(); > tfile = rcu_dereference(tun->tfiles[txq]); > @@ -1107,9 +1165,13 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > > nf_reset_ct(skb); > > - if (ptr_ring_produce(&tfile->tx_ring, skb)) > + act = tun_do_xdp_tx_generic(tun, dev, skb); > + if (act != XDP_PASS) > goto drop; > > + if (ptr_ring_produce(&tfile->tx_ring, skb)) > + goto err_out; > + > /* Notify and wake up reader process */ > if (tfile->flags & TUN_FASYNC) > kill_fasync(&tfile->fasync, SIGIO, POLL_IN); > @@ -1118,10 +1180,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > rcu_read_unlock(); > return NETDEV_TX_OK; > > -drop: > - this_cpu_inc(tun->pcpu_stats->tx_dropped); > +err_out: > skb_tx_error(skb); > kfree_skb(skb); > +drop: > + this_cpu_inc(tun->pcpu_stats->tx_dropped); > rcu_read_unlock(); > return NET_XMIT_DROP; > }
On 3/1/20 8:28 PM, Jason Wang wrote: >> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun, >> + struct net_device *dev, >> + struct sk_buff *skb) >> +{ >> + struct bpf_prog *xdp_prog; >> + u32 act = XDP_PASS; >> + >> + xdp_prog = rcu_dereference(tun->xdp_egress_prog); >> + if (xdp_prog) { >> + struct xdp_txq_info txq = { .dev = dev }; >> + struct xdp_buff xdp; >> + >> + skb = tun_prepare_xdp_skb(skb); >> + if (!skb) { >> + act = XDP_DROP; >> + goto out; >> + } >> + >> + xdp.txq = &txq; >> + >> + act = do_xdp_generic_core(skb, &xdp, xdp_prog); >> + switch (act) { >> + case XDP_TX: /* for Tx path, XDP_TX == XDP_PASS */ >> + act = XDP_PASS; >> + break; > > > Jute a note here, I agree for TX XDP it may be better to do this. > > But for offloaded program we need different semantic. Or we can deal > this with attach types? > This path is for XDP_FLAGS_DRV_MODE. Offloaded programs (XDP_FLAGS_HW_MODE) will be run in vhost context.
On Wed, 26 Feb 2020 20:20:10 -0700 David Ahern <dsahern@kernel.org> wrote: > +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun, > + struct net_device *dev, > + struct sk_buff *skb) > +{ > + struct bpf_prog *xdp_prog; > + u32 act = XDP_PASS; > + > + xdp_prog = rcu_dereference(tun->xdp_egress_prog); > + if (xdp_prog) { > + struct xdp_txq_info txq = { .dev = dev }; > + struct xdp_buff xdp; > + > + skb = tun_prepare_xdp_skb(skb); > + if (!skb) { > + act = XDP_DROP; > + goto out; > + } > + > + xdp.txq = &txq; > + > + act = do_xdp_generic_core(skb, &xdp, xdp_prog); > + switch (act) { > + case XDP_TX: /* for Tx path, XDP_TX == XDP_PASS */ > + act = XDP_PASS; > + break; > + case XDP_PASS: > + break; > + case XDP_REDIRECT: > + /* fall through */ > + default: > + bpf_warn_invalid_xdp_action(act); > + /* fall through */ > + case XDP_ABORTED: > + trace_xdp_exception(tun->dev, xdp_prog, act); Hmm, don't we need to extend the trace_xdp_exception() to give users a hint that this happened on the TX/egress path? > + /* fall through */ > + case XDP_DROP: > + break; > + } > + } > + > +out: > + return act; > +}
On 3/3/20 3:46 AM, Jesper Dangaard Brouer wrote: > On Wed, 26 Feb 2020 20:20:10 -0700 > David Ahern <dsahern@kernel.org> wrote: > >> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun, >> + struct net_device *dev, >> + struct sk_buff *skb) >> +{ >> + struct bpf_prog *xdp_prog; >> + u32 act = XDP_PASS; >> + >> + xdp_prog = rcu_dereference(tun->xdp_egress_prog); >> + if (xdp_prog) { >> + struct xdp_txq_info txq = { .dev = dev }; >> + struct xdp_buff xdp; >> + >> + skb = tun_prepare_xdp_skb(skb); >> + if (!skb) { >> + act = XDP_DROP; >> + goto out; >> + } >> + >> + xdp.txq = &txq; >> + >> + act = do_xdp_generic_core(skb, &xdp, xdp_prog); >> + switch (act) { >> + case XDP_TX: /* for Tx path, XDP_TX == XDP_PASS */ >> + act = XDP_PASS; >> + break; >> + case XDP_PASS: >> + break; >> + case XDP_REDIRECT: >> + /* fall through */ >> + default: >> + bpf_warn_invalid_xdp_action(act); >> + /* fall through */ >> + case XDP_ABORTED: >> + trace_xdp_exception(tun->dev, xdp_prog, act); > > Hmm, don't we need to extend the trace_xdp_exception() to give users a > hint that this happened on the TX/egress path? tracepoint has the program id, unsupported action and device. Seems like the program id is sufficient. I do need to update libbpf to account for the attach type.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 6aae398b904b..dcae6521a39d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1059,6 +1059,63 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, return len; } +static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb) +{ + if (skb_shared(skb) || skb_cloned(skb)) { + struct sk_buff *nskb; + + nskb = skb_copy(skb, GFP_ATOMIC); + consume_skb(skb); + return nskb; + } + + return skb; +} + +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun, + struct net_device *dev, + struct sk_buff *skb) +{ + struct bpf_prog *xdp_prog; + u32 act = XDP_PASS; + + xdp_prog = rcu_dereference(tun->xdp_egress_prog); + if (xdp_prog) { + struct xdp_txq_info txq = { .dev = dev }; + struct xdp_buff xdp; + + skb = tun_prepare_xdp_skb(skb); + if (!skb) { + act = XDP_DROP; + goto out; + } + + xdp.txq = &txq; + + act = do_xdp_generic_core(skb, &xdp, xdp_prog); + switch (act) { + case XDP_TX: /* for Tx path, XDP_TX == XDP_PASS */ + act = XDP_PASS; + break; + case XDP_PASS: + break; + case XDP_REDIRECT: + /* fall through */ + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(tun->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + break; + } + } + +out: + return act; +} + /* Net device start xmit */ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1066,6 +1123,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) int txq = skb->queue_mapping; struct tun_file *tfile; int len = skb->len; + u32 act; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); @@ -1107,9 +1165,13 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); - if (ptr_ring_produce(&tfile->tx_ring, skb)) + act = tun_do_xdp_tx_generic(tun, dev, skb); + if (act != XDP_PASS) goto drop; + if (ptr_ring_produce(&tfile->tx_ring, skb)) + goto err_out; + /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); @@ -1118,10 +1180,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); return NETDEV_TX_OK; -drop: - this_cpu_inc(tun->pcpu_stats->tx_dropped); +err_out: skb_tx_error(skb); kfree_skb(skb); +drop: + this_cpu_inc(tun->pcpu_stats->tx_dropped); rcu_read_unlock(); return NET_XMIT_DROP; }