diff mbox series

[RFC,v4,bpf-next,08/11] tun: Support xdp in the Tx path for skb

Message ID 20200227032013.12385-9-dsahern@kernel.org
State RFC
Delegated to: BPF Maintainers
Headers show
Series Add support for XDP in egress path | expand

Commit Message

David Ahern Feb. 27, 2020, 3:20 a.m. UTC
From: David Ahern <dahern@digitalocean.com>

Add support to run Tx path program on packets arriving at a tun
device as an skb.

XDP_TX return code means move the packet to the Tx path of the device.
For a program run in the Tx / egress path, XDP_TX is essentially the
same as "continue on" which is XDP_PASS.

Conceptually, XDP_REDIRECT for this path can work the same as it
does for the Rx path, but that return code is left for a follow
on series.

Signed-off-by: Prashant Bhole <prashantbhole.linux@gmail.com>
Signed-off-by: David Ahern <dahern@digitalocean.com>
---
 drivers/net/tun.c | 69 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 3 deletions(-)

Comments

Jason Wang March 2, 2020, 3:28 a.m. UTC | #1
On 2020/2/27 上午11:20, David Ahern wrote:
> From: David Ahern <dahern@digitalocean.com>
>
> Add support to run Tx path program on packets arriving at a tun
> device as an skb.
>
> XDP_TX return code means move the packet to the Tx path of the device.
> For a program run in the Tx / egress path, XDP_TX is essentially the
> same as "continue on" which is XDP_PASS.
>
> Conceptually, XDP_REDIRECT for this path can work the same as it
> does for the Rx path, but that return code is left for a follow
> on series.
>
> Signed-off-by: Prashant Bhole <prashantbhole.linux@gmail.com>
> Signed-off-by: David Ahern <dahern@digitalocean.com>
> ---
>   drivers/net/tun.c | 69 ++++++++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 66 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 6aae398b904b..dcae6521a39d 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1059,6 +1059,63 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
>   	return len;
>   }
>   
> +static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb)
> +{
> +	if (skb_shared(skb) || skb_cloned(skb)) {
> +		struct sk_buff *nskb;
> +
> +		nskb = skb_copy(skb, GFP_ATOMIC);
> +		consume_skb(skb);
> +		return nskb;
> +	}
> +
> +	return skb;
> +}
> +
> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
> +				 struct net_device *dev,
> +				 struct sk_buff *skb)
> +{
> +	struct bpf_prog *xdp_prog;
> +	u32 act = XDP_PASS;
> +
> +	xdp_prog = rcu_dereference(tun->xdp_egress_prog);
> +	if (xdp_prog) {
> +		struct xdp_txq_info txq = { .dev = dev };
> +		struct xdp_buff xdp;
> +
> +		skb = tun_prepare_xdp_skb(skb);
> +		if (!skb) {
> +			act = XDP_DROP;
> +			goto out;
> +		}
> +
> +		xdp.txq = &txq;
> +
> +		act = do_xdp_generic_core(skb, &xdp, xdp_prog);
> +		switch (act) {
> +		case XDP_TX:    /* for Tx path, XDP_TX == XDP_PASS */
> +			act = XDP_PASS;
> +			break;


Jute a note here, I agree for TX XDP it may be better to do this.

But for offloaded program we need different semantic. Or we can deal 
this with attach types?

Thanks


> +		case XDP_PASS:
> +			break;
> +		case XDP_REDIRECT:
> +			/* fall through */
> +		default:
> +			bpf_warn_invalid_xdp_action(act);
> +			/* fall through */
> +		case XDP_ABORTED:
> +			trace_xdp_exception(tun->dev, xdp_prog, act);
> +			/* fall through */
> +		case XDP_DROP:
> +			break;
> +		}
> +	}
> +
> +out:
> +	return act;
> +}
> +
>   /* Net device start xmit */
>   static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> @@ -1066,6 +1123,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   	int txq = skb->queue_mapping;
>   	struct tun_file *tfile;
>   	int len = skb->len;
> +	u32 act;
>   
>   	rcu_read_lock();
>   	tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1107,9 +1165,13 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   
>   	nf_reset_ct(skb);
>   
> -	if (ptr_ring_produce(&tfile->tx_ring, skb))
> +	act = tun_do_xdp_tx_generic(tun, dev, skb);
> +	if (act != XDP_PASS)
>   		goto drop;
>   
> +	if (ptr_ring_produce(&tfile->tx_ring, skb))
> +		goto err_out;
> +
>   	/* Notify and wake up reader process */
>   	if (tfile->flags & TUN_FASYNC)
>   		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> @@ -1118,10 +1180,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   	rcu_read_unlock();
>   	return NETDEV_TX_OK;
>   
> -drop:
> -	this_cpu_inc(tun->pcpu_stats->tx_dropped);
> +err_out:
>   	skb_tx_error(skb);
>   	kfree_skb(skb);
> +drop:
> +	this_cpu_inc(tun->pcpu_stats->tx_dropped);
>   	rcu_read_unlock();
>   	return NET_XMIT_DROP;
>   }
David Ahern March 2, 2020, 3:41 a.m. UTC | #2
On 3/1/20 8:28 PM, Jason Wang wrote:
>> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
>> +                 struct net_device *dev,
>> +                 struct sk_buff *skb)
>> +{
>> +    struct bpf_prog *xdp_prog;
>> +    u32 act = XDP_PASS;
>> +
>> +    xdp_prog = rcu_dereference(tun->xdp_egress_prog);
>> +    if (xdp_prog) {
>> +        struct xdp_txq_info txq = { .dev = dev };
>> +        struct xdp_buff xdp;
>> +
>> +        skb = tun_prepare_xdp_skb(skb);
>> +        if (!skb) {
>> +            act = XDP_DROP;
>> +            goto out;
>> +        }
>> +
>> +        xdp.txq = &txq;
>> +
>> +        act = do_xdp_generic_core(skb, &xdp, xdp_prog);
>> +        switch (act) {
>> +        case XDP_TX:    /* for Tx path, XDP_TX == XDP_PASS */
>> +            act = XDP_PASS;
>> +            break;
> 
> 
> Jute a note here, I agree for TX XDP it may be better to do this.
> 
> But for offloaded program we need different semantic. Or we can deal
> this with attach types?
> 

This path is for XDP_FLAGS_DRV_MODE. Offloaded programs
(XDP_FLAGS_HW_MODE) will be run in vhost context.
Jesper Dangaard Brouer March 3, 2020, 10:46 a.m. UTC | #3
On Wed, 26 Feb 2020 20:20:10 -0700
David Ahern <dsahern@kernel.org> wrote:

> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
> +				 struct net_device *dev,
> +				 struct sk_buff *skb)
> +{
> +	struct bpf_prog *xdp_prog;
> +	u32 act = XDP_PASS;
> +
> +	xdp_prog = rcu_dereference(tun->xdp_egress_prog);
> +	if (xdp_prog) {
> +		struct xdp_txq_info txq = { .dev = dev };
> +		struct xdp_buff xdp;
> +
> +		skb = tun_prepare_xdp_skb(skb);
> +		if (!skb) {
> +			act = XDP_DROP;
> +			goto out;
> +		}
> +
> +		xdp.txq = &txq;
> +
> +		act = do_xdp_generic_core(skb, &xdp, xdp_prog);
> +		switch (act) {
> +		case XDP_TX:    /* for Tx path, XDP_TX == XDP_PASS */
> +			act = XDP_PASS;
> +			break;
> +		case XDP_PASS:
> +			break;
> +		case XDP_REDIRECT:
> +			/* fall through */
> +		default:
> +			bpf_warn_invalid_xdp_action(act);
> +			/* fall through */
> +		case XDP_ABORTED:
> +			trace_xdp_exception(tun->dev, xdp_prog, act);

Hmm, don't we need to extend the trace_xdp_exception() to give users a
hint that this happened on the TX/egress path?

> +			/* fall through */
> +		case XDP_DROP:
> +			break;
> +		}
> +	}
> +
> +out:
> +	return act;
> +}
David Ahern March 3, 2020, 3:36 p.m. UTC | #4
On 3/3/20 3:46 AM, Jesper Dangaard Brouer wrote:
> On Wed, 26 Feb 2020 20:20:10 -0700
> David Ahern <dsahern@kernel.org> wrote:
> 
>> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
>> +				 struct net_device *dev,
>> +				 struct sk_buff *skb)
>> +{
>> +	struct bpf_prog *xdp_prog;
>> +	u32 act = XDP_PASS;
>> +
>> +	xdp_prog = rcu_dereference(tun->xdp_egress_prog);
>> +	if (xdp_prog) {
>> +		struct xdp_txq_info txq = { .dev = dev };
>> +		struct xdp_buff xdp;
>> +
>> +		skb = tun_prepare_xdp_skb(skb);
>> +		if (!skb) {
>> +			act = XDP_DROP;
>> +			goto out;
>> +		}
>> +
>> +		xdp.txq = &txq;
>> +
>> +		act = do_xdp_generic_core(skb, &xdp, xdp_prog);
>> +		switch (act) {
>> +		case XDP_TX:    /* for Tx path, XDP_TX == XDP_PASS */
>> +			act = XDP_PASS;
>> +			break;
>> +		case XDP_PASS:
>> +			break;
>> +		case XDP_REDIRECT:
>> +			/* fall through */
>> +		default:
>> +			bpf_warn_invalid_xdp_action(act);
>> +			/* fall through */
>> +		case XDP_ABORTED:
>> +			trace_xdp_exception(tun->dev, xdp_prog, act);
> 
> Hmm, don't we need to extend the trace_xdp_exception() to give users a
> hint that this happened on the TX/egress path?

tracepoint has the program id, unsupported action and device. Seems like
the program id is sufficient. I do need to update libbpf to account for
the attach type.
diff mbox series

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 6aae398b904b..dcae6521a39d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1059,6 +1059,63 @@  static unsigned int run_ebpf_filter(struct tun_struct *tun,
 	return len;
 }
 
+static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb)
+{
+	if (skb_shared(skb) || skb_cloned(skb)) {
+		struct sk_buff *nskb;
+
+		nskb = skb_copy(skb, GFP_ATOMIC);
+		consume_skb(skb);
+		return nskb;
+	}
+
+	return skb;
+}
+
+static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
+				 struct net_device *dev,
+				 struct sk_buff *skb)
+{
+	struct bpf_prog *xdp_prog;
+	u32 act = XDP_PASS;
+
+	xdp_prog = rcu_dereference(tun->xdp_egress_prog);
+	if (xdp_prog) {
+		struct xdp_txq_info txq = { .dev = dev };
+		struct xdp_buff xdp;
+
+		skb = tun_prepare_xdp_skb(skb);
+		if (!skb) {
+			act = XDP_DROP;
+			goto out;
+		}
+
+		xdp.txq = &txq;
+
+		act = do_xdp_generic_core(skb, &xdp, xdp_prog);
+		switch (act) {
+		case XDP_TX:    /* for Tx path, XDP_TX == XDP_PASS */
+			act = XDP_PASS;
+			break;
+		case XDP_PASS:
+			break;
+		case XDP_REDIRECT:
+			/* fall through */
+		default:
+			bpf_warn_invalid_xdp_action(act);
+			/* fall through */
+		case XDP_ABORTED:
+			trace_xdp_exception(tun->dev, xdp_prog, act);
+			/* fall through */
+		case XDP_DROP:
+			break;
+		}
+	}
+
+out:
+	return act;
+}
+
 /* Net device start xmit */
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -1066,6 +1123,7 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	int txq = skb->queue_mapping;
 	struct tun_file *tfile;
 	int len = skb->len;
+	u32 act;
 
 	rcu_read_lock();
 	tfile = rcu_dereference(tun->tfiles[txq]);
@@ -1107,9 +1165,13 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset_ct(skb);
 
-	if (ptr_ring_produce(&tfile->tx_ring, skb))
+	act = tun_do_xdp_tx_generic(tun, dev, skb);
+	if (act != XDP_PASS)
 		goto drop;
 
+	if (ptr_ring_produce(&tfile->tx_ring, skb))
+		goto err_out;
+
 	/* Notify and wake up reader process */
 	if (tfile->flags & TUN_FASYNC)
 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
@@ -1118,10 +1180,11 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 	return NETDEV_TX_OK;
 
-drop:
-	this_cpu_inc(tun->pcpu_stats->tx_dropped);
+err_out:
 	skb_tx_error(skb);
 	kfree_skb(skb);
+drop:
+	this_cpu_inc(tun->pcpu_stats->tx_dropped);
 	rcu_read_unlock();
 	return NET_XMIT_DROP;
 }