diff mbox series

[v3,bpf-next,5/8] veth: Add ndo_xdp_xmit

Message ID 20180722151308.5480-6-toshiaki.makita1@gmail.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series veth: Driver XDP | expand

Commit Message

Toshiaki Makita July 22, 2018, 3:13 p.m. UTC
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

This allows NIC's XDP to redirect packets to veth. The destination veth
device enqueues redirected packets to the napi ring of its peer, then
they are processed by XDP on its peer veth device.
This can be thought as calling another XDP program by XDP program using
REDIRECT, when the peer enables driver XDP.

Note that when the peer veth device does not set driver xdp, redirected
packets will be dropped because the peer is not ready for NAPI.

v2:
- Drop the part converting xdp_frame into skb when XDP is not enabled.
- Implement bulk interface of ndo_xdp_xmit.
- Implement XDP_XMIT_FLUSH bit and drop ndo_xdp_flush.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
 drivers/net/veth.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

kernel test robot July 24, 2018, 12:19 a.m. UTC | #1
Hi Toshiaki,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Toshiaki-Makita/veth-Driver-XDP/20180724-065517
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: i386-randconfig-x001-201829 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:10:0,
                    from include/linux/list.h:9,
                    from include/linux/timer.h:5,
                    from include/linux/netdevice.h:28,
                    from drivers//net/veth.c:11:
   drivers//net/veth.c: In function 'veth_xdp_xmit':
>> drivers//net/veth.c:300:16: error: implicit declaration of function 'xdp_ok_fwd_dev' [-Werror=implicit-function-declaration]
      if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
                   ^
   include/linux/compiler.h:77:42: note: in definition of macro 'unlikely'
    # define unlikely(x) __builtin_expect(!!(x), 0)
                                             ^
   cc1: some warnings being treated as errors

vim +/xdp_ok_fwd_dev +300 drivers//net/veth.c

   275	
   276	static int veth_xdp_xmit(struct net_device *dev, int n,
   277				 struct xdp_frame **frames, u32 flags)
   278	{
   279		struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
   280		struct net_device *rcv;
   281		int i, drops = 0;
   282	
   283		if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
   284			return -EINVAL;
   285	
   286		rcv = rcu_dereference(priv->peer);
   287		if (unlikely(!rcv))
   288			return -ENXIO;
   289	
   290		rcv_priv = netdev_priv(rcv);
   291		/* xdp_ring is initialized on receive side? */
   292		if (!rcu_access_pointer(rcv_priv->xdp_prog))
   293			return -ENXIO;
   294	
   295		spin_lock(&rcv_priv->xdp_ring.producer_lock);
   296		for (i = 0; i < n; i++) {
   297			struct xdp_frame *frame = frames[i];
   298			void *ptr = veth_xdp_to_ptr(frame);
   299	
 > 300			if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
   301				     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {
   302				xdp_return_frame_rx_napi(frame);
   303				drops++;
   304			}
   305		}
   306		spin_unlock(&rcv_priv->xdp_ring.producer_lock);
   307	
   308		if (flags & XDP_XMIT_FLUSH)
   309			__veth_xdp_flush(rcv_priv);
   310	
   311		return n - drops;
   312	}
   313	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 24, 2018, 12:33 a.m. UTC | #2
Hi Toshiaki,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Toshiaki-Makita/veth-Driver-XDP/20180724-065517
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-x010-201829 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:10:0,
                    from include/linux/list.h:9,
                    from include/linux/timer.h:5,
                    from include/linux/netdevice.h:28,
                    from drivers//net/veth.c:11:
   drivers//net/veth.c: In function 'veth_xdp_xmit':
   drivers//net/veth.c:300:16: error: implicit declaration of function 'xdp_ok_fwd_dev' [-Werror=implicit-function-declaration]
      if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
                   ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> drivers//net/veth.c:300:3: note: in expansion of macro 'if'
      if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
      ^~
   include/linux/compiler.h:48:24: note: in expansion of macro '__branch_check__'
    #  define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x)))
                           ^~~~~~~~~~~~~~~~
>> drivers//net/veth.c:300:7: note: in expansion of macro 'unlikely'
      if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
          ^~~~~~~~
   cc1: some warnings being treated as errors

vim +/if +300 drivers//net/veth.c

   275	
   276	static int veth_xdp_xmit(struct net_device *dev, int n,
   277				 struct xdp_frame **frames, u32 flags)
   278	{
   279		struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
   280		struct net_device *rcv;
   281		int i, drops = 0;
   282	
   283		if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
   284			return -EINVAL;
   285	
   286		rcv = rcu_dereference(priv->peer);
   287		if (unlikely(!rcv))
   288			return -ENXIO;
   289	
   290		rcv_priv = netdev_priv(rcv);
   291		/* xdp_ring is initialized on receive side? */
   292		if (!rcu_access_pointer(rcv_priv->xdp_prog))
   293			return -ENXIO;
   294	
   295		spin_lock(&rcv_priv->xdp_ring.producer_lock);
   296		for (i = 0; i < n; i++) {
   297			struct xdp_frame *frame = frames[i];
   298			void *ptr = veth_xdp_to_ptr(frame);
   299	
 > 300			if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
   301				     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {
   302				xdp_return_frame_rx_napi(frame);
   303				drops++;
   304			}
   305		}
   306		spin_unlock(&rcv_priv->xdp_ring.producer_lock);
   307	
   308		if (flags & XDP_XMIT_FLUSH)
   309			__veth_xdp_flush(rcv_priv);
   310	
   311		return n - drops;
   312	}
   313	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Jakub Kicinski July 24, 2018, 1:02 a.m. UTC | #3
On Mon, 23 Jul 2018 00:13:05 +0900, Toshiaki Makita wrote:
> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> 
> This allows NIC's XDP to redirect packets to veth. The destination veth
> device enqueues redirected packets to the napi ring of its peer, then
> they are processed by XDP on its peer veth device.
> This can be thought as calling another XDP program by XDP program using
> REDIRECT, when the peer enables driver XDP.
> 
> Note that when the peer veth device does not set driver xdp, redirected
> packets will be dropped because the peer is not ready for NAPI.

Often we can't redirect to devices which don't have am xdp program
installed.  In your case we can't redirect unless the peer of the
target doesn't have a program installed?  :(

Perhaps it is time to reconsider what Saeed once asked for, a flag or
attribute to enable being the destination of a XDP_REDIRECT.

> v2:
> - Drop the part converting xdp_frame into skb when XDP is not enabled.
> - Implement bulk interface of ndo_xdp_xmit.
> - Implement XDP_XMIT_FLUSH bit and drop ndo_xdp_flush.
> 
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> ---
>  drivers/net/veth.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
> 
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 4be75c58bc6a..57187e955fea 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -17,6 +17,7 @@
>  #include <net/rtnetlink.h>
>  #include <net/dst.h>
>  #include <net/xfrm.h>
> +#include <net/xdp.h>
>  #include <linux/veth.h>
>  #include <linux/module.h>
>  #include <linux/bpf.h>
> @@ -125,6 +126,11 @@ static void *veth_ptr_to_xdp(void *ptr)
>  	return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
>  }
>  
> +static void *veth_xdp_to_ptr(void *ptr)
> +{
> +	return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
> +}
> +
>  static void veth_ptr_free(void *ptr)
>  {
>  	if (veth_is_xdp_frame(ptr))
> @@ -267,6 +273,44 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
>  	return skb;
>  }
>  
> +static int veth_xdp_xmit(struct net_device *dev, int n,
> +			 struct xdp_frame **frames, u32 flags)
> +{
> +	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
> +	struct net_device *rcv;
> +	int i, drops = 0;
> +
> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> +		return -EINVAL;
> +
> +	rcv = rcu_dereference(priv->peer);
> +	if (unlikely(!rcv))
> +		return -ENXIO;
> +
> +	rcv_priv = netdev_priv(rcv);
> +	/* xdp_ring is initialized on receive side? */
> +	if (!rcu_access_pointer(rcv_priv->xdp_prog))
> +		return -ENXIO;
> +
> +	spin_lock(&rcv_priv->xdp_ring.producer_lock);
> +	for (i = 0; i < n; i++) {
> +		struct xdp_frame *frame = frames[i];
> +		void *ptr = veth_xdp_to_ptr(frame);
> +
> +		if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
> +			     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {

Would you mind sparing a few more words how this is safe vs the
.ndo_close() on the peer?  Personally I'm a bit uncomfortable with the
IFF_UP check in xdp_ok_fwd_dev(), I'm not sure what's supposed to
guarantee the device doesn't go down right after that check, or is
already down, but netdev->flags are not atomic...  

> +			xdp_return_frame_rx_napi(frame);
> +			drops++;
> +		}
> +	}
> +	spin_unlock(&rcv_priv->xdp_ring.producer_lock);
> +
> +	if (flags & XDP_XMIT_FLUSH)
> +		__veth_xdp_flush(rcv_priv);
> +
> +	return n - drops;
> +}
> +
>  static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
>  					struct xdp_frame *frame)
>  {
> @@ -760,6 +804,7 @@ static const struct net_device_ops veth_netdev_ops = {
>  	.ndo_features_check	= passthru_features_check,
>  	.ndo_set_rx_headroom	= veth_set_rx_headroom,
>  	.ndo_bpf		= veth_xdp,
> +	.ndo_xdp_xmit		= veth_xdp_xmit,
>  };
>  
>  #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
Toshiaki Makita July 24, 2018, 1:59 a.m. UTC | #4
On 2018/07/24 9:19, kbuild test robot wrote:
> Hi Toshiaki,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on bpf-next/master]
> 
> url:    https://github.com/0day-ci/linux/commits/Toshiaki-Makita/veth-Driver-XDP/20180724-065517
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
> config: i386-randconfig-x001-201829 (attached as .config)
> compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
> reproduce:
>         # save the attached .config to linux build tree
>         make ARCH=i386 
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from include/linux/kernel.h:10:0,
>                     from include/linux/list.h:9,
>                     from include/linux/timer.h:5,
>                     from include/linux/netdevice.h:28,
>                     from drivers//net/veth.c:11:
>    drivers//net/veth.c: In function 'veth_xdp_xmit':
>>> drivers//net/veth.c:300:16: error: implicit declaration of function 'xdp_ok_fwd_dev' [-Werror=implicit-function-declaration]
>       if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||

This is because this series depends on commit d8d7218ad842 ("xdp:
XDP_REDIRECT should check IFF_UP and MTU") which is currently in DaveM's
net-next tree, as I noted in the cover letter.
Toshiaki Makita July 24, 2018, 2:11 a.m. UTC | #5
On 2018/07/24 10:02, Jakub Kicinski wrote:
> On Mon, 23 Jul 2018 00:13:05 +0900, Toshiaki Makita wrote:
>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>
>> This allows NIC's XDP to redirect packets to veth. The destination veth
>> device enqueues redirected packets to the napi ring of its peer, then
>> they are processed by XDP on its peer veth device.
>> This can be thought as calling another XDP program by XDP program using
>> REDIRECT, when the peer enables driver XDP.
>>
>> Note that when the peer veth device does not set driver xdp, redirected
>> packets will be dropped because the peer is not ready for NAPI.
> 
> Often we can't redirect to devices which don't have am xdp program
> installed.  In your case we can't redirect unless the peer of the
> target doesn't have a program installed?  :(

Right. I tried to avoid this case by converting xdp_frames to skb but
realized that should not be done.
https://patchwork.ozlabs.org/patch/903536/

> Perhaps it is time to reconsider what Saeed once asked for, a flag or
> attribute to enable being the destination of a XDP_REDIRECT.

Yes, something will be necessary. Jesper said Tariq had some ideas to
implement it.

> 
>> v2:
>> - Drop the part converting xdp_frame into skb when XDP is not enabled.
>> - Implement bulk interface of ndo_xdp_xmit.
>> - Implement XDP_XMIT_FLUSH bit and drop ndo_xdp_flush.
>>
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> ---
>>  drivers/net/veth.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 45 insertions(+)
>>
>> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
>> index 4be75c58bc6a..57187e955fea 100644
>> --- a/drivers/net/veth.c
>> +++ b/drivers/net/veth.c
>> @@ -17,6 +17,7 @@
>>  #include <net/rtnetlink.h>
>>  #include <net/dst.h>
>>  #include <net/xfrm.h>
>> +#include <net/xdp.h>
>>  #include <linux/veth.h>
>>  #include <linux/module.h>
>>  #include <linux/bpf.h>
>> @@ -125,6 +126,11 @@ static void *veth_ptr_to_xdp(void *ptr)
>>  	return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
>>  }
>>  
>> +static void *veth_xdp_to_ptr(void *ptr)
>> +{
>> +	return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
>> +}
>> +
>>  static void veth_ptr_free(void *ptr)
>>  {
>>  	if (veth_is_xdp_frame(ptr))
>> @@ -267,6 +273,44 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
>>  	return skb;
>>  }
>>  
>> +static int veth_xdp_xmit(struct net_device *dev, int n,
>> +			 struct xdp_frame **frames, u32 flags)
>> +{
>> +	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
>> +	struct net_device *rcv;
>> +	int i, drops = 0;
>> +
>> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> +		return -EINVAL;
>> +
>> +	rcv = rcu_dereference(priv->peer);
>> +	if (unlikely(!rcv))
>> +		return -ENXIO;
>> +
>> +	rcv_priv = netdev_priv(rcv);
>> +	/* xdp_ring is initialized on receive side? */
>> +	if (!rcu_access_pointer(rcv_priv->xdp_prog))
>> +		return -ENXIO;
>> +
>> +	spin_lock(&rcv_priv->xdp_ring.producer_lock);
>> +	for (i = 0; i < n; i++) {
>> +		struct xdp_frame *frame = frames[i];
>> +		void *ptr = veth_xdp_to_ptr(frame);
>> +
>> +		if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
>> +			     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {
> 
> Would you mind sparing a few more words how this is safe vs the
> .ndo_close() on the peer?  Personally I'm a bit uncomfortable with the
> IFF_UP check in xdp_ok_fwd_dev(), I'm not sure what's supposed to
> guarantee the device doesn't go down right after that check, or is
> already down, but netdev->flags are not atomic...  
> 
>> +			xdp_return_frame_rx_napi(frame);
>> +			drops++;
>> +		}
>> +	}
>> +	spin_unlock(&rcv_priv->xdp_ring.producer_lock);
>> +
>> +	if (flags & XDP_XMIT_FLUSH)
>> +		__veth_xdp_flush(rcv_priv);
>> +
>> +	return n - drops;
>> +}
>> +
>>  static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
>>  					struct xdp_frame *frame)
>>  {
>> @@ -760,6 +804,7 @@ static const struct net_device_ops veth_netdev_ops = {
>>  	.ndo_features_check	= passthru_features_check,
>>  	.ndo_set_rx_headroom	= veth_set_rx_headroom,
>>  	.ndo_bpf		= veth_xdp,
>> +	.ndo_xdp_xmit		= veth_xdp_xmit,
>>  };
>>  
>>  #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
> 
> 
>
Toshiaki Makita July 24, 2018, 2:24 a.m. UTC | #6
On 2018/07/24 10:02, Jakub Kicinski wrote:
> On Mon, 23 Jul 2018 00:13:05 +0900, Toshiaki Makita wrote:
>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>
>> This allows NIC's XDP to redirect packets to veth. The destination veth
>> device enqueues redirected packets to the napi ring of its peer, then
>> they are processed by XDP on its peer veth device.
>> This can be thought as calling another XDP program by XDP program using
>> REDIRECT, when the peer enables driver XDP.
>>
>> Note that when the peer veth device does not set driver xdp, redirected
>> packets will be dropped because the peer is not ready for NAPI.
...
>> +static int veth_xdp_xmit(struct net_device *dev, int n,
>> +			 struct xdp_frame **frames, u32 flags)
>> +{
>> +	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
>> +	struct net_device *rcv;
>> +	int i, drops = 0;
>> +
>> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> +		return -EINVAL;
>> +
>> +	rcv = rcu_dereference(priv->peer);
>> +	if (unlikely(!rcv))
>> +		return -ENXIO;
>> +
>> +	rcv_priv = netdev_priv(rcv);
>> +	/* xdp_ring is initialized on receive side? */
>> +	if (!rcu_access_pointer(rcv_priv->xdp_prog))
>> +		return -ENXIO;
>> +
>> +	spin_lock(&rcv_priv->xdp_ring.producer_lock);
>> +	for (i = 0; i < n; i++) {
>> +		struct xdp_frame *frame = frames[i];
>> +		void *ptr = veth_xdp_to_ptr(frame);
>> +
>> +		if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
>> +			     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {
> 
> Would you mind sparing a few more words how this is safe vs the
> .ndo_close() on the peer?  Personally I'm a bit uncomfortable with the
> IFF_UP check in xdp_ok_fwd_dev(), I'm not sure what's supposed to
> guarantee the device doesn't go down right after that check, or is
> already down, but netdev->flags are not atomic...  

Actually it is guarded by RCU. On closing the device rcv_priv->xdp_prog
is set to be NULL, and synchronize_net() is called from within
netif_napi_del(). Then ptr_ring is cleaned-up.
xdp_ok_fwd_dev() is doing the same check as non-XDP case, but it may not
be appropriate because IFF_UP check here is not usable as you say.

> 
>> +			xdp_return_frame_rx_napi(frame);
>> +			drops++;
>> +		}
>> +	}
>> +	spin_unlock(&rcv_priv->xdp_ring.producer_lock);
>> +
>> +	if (flags & XDP_XMIT_FLUSH)
>> +		__veth_xdp_flush(rcv_priv);
>> +
>> +	return n - drops;
>> +}
>> +
>>  static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
>>  					struct xdp_frame *frame)
>>  {
>> @@ -760,6 +804,7 @@ static const struct net_device_ops veth_netdev_ops = {
>>  	.ndo_features_check	= passthru_features_check,
>>  	.ndo_set_rx_headroom	= veth_set_rx_headroom,
>>  	.ndo_bpf		= veth_xdp,
>> +	.ndo_xdp_xmit		= veth_xdp_xmit,
>>  };
>>  
>>  #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
> 
> 
>
Tariq Toukan July 24, 2018, 1:58 p.m. UTC | #7
On 24/07/2018 5:11 AM, Toshiaki Makita wrote:
> On 2018/07/24 10:02, Jakub Kicinski wrote:
>> On Mon, 23 Jul 2018 00:13:05 +0900, Toshiaki Makita wrote:
>>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>
>>> This allows NIC's XDP to redirect packets to veth. The destination veth
>>> device enqueues redirected packets to the napi ring of its peer, then
>>> they are processed by XDP on its peer veth device.
>>> This can be thought as calling another XDP program by XDP program using
>>> REDIRECT, when the peer enables driver XDP.
>>>
>>> Note that when the peer veth device does not set driver xdp, redirected
>>> packets will be dropped because the peer is not ready for NAPI.
>>
>> Often we can't redirect to devices which don't have am xdp program
>> installed.  In your case we can't redirect unless the peer of the
>> target doesn't have a program installed?  :(
> 
> Right. I tried to avoid this case by converting xdp_frames to skb but
> realized that should not be done.
> https://patchwork.ozlabs.org/patch/903536/
> 
>> Perhaps it is time to reconsider what Saeed once asked for, a flag or
>> attribute to enable being the destination of a XDP_REDIRECT.
> 
> Yes, something will be necessary. Jesper said Tariq had some ideas to
> implement it.
> 

Yes, that bothered me as well.

I think that the driver-out capability of the XDP redirect is totally 
unrelated to any XDP program, and is a standalone feature that should be 
simply turned on/off just like any other performance feature, via 
ethtool -K.

I am going to push my driver implementation (mlx5) of XDP redirect 
driver-out side very soon (this week).
As you will see, it does not require loading any XDP program, and the 
feature will be always on (for now, until we add a control flow for it).

Later, we plan to push ethtool infrastructure and driver implementation 
to control the feature.

Thanks,
Tariq
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 4be75c58bc6a..57187e955fea 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -17,6 +17,7 @@ 
 #include <net/rtnetlink.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
+#include <net/xdp.h>
 #include <linux/veth.h>
 #include <linux/module.h>
 #include <linux/bpf.h>
@@ -125,6 +126,11 @@  static void *veth_ptr_to_xdp(void *ptr)
 	return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
 }
 
+static void *veth_xdp_to_ptr(void *ptr)
+{
+	return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
+}
+
 static void veth_ptr_free(void *ptr)
 {
 	if (veth_is_xdp_frame(ptr))
@@ -267,6 +273,44 @@  static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
 	return skb;
 }
 
+static int veth_xdp_xmit(struct net_device *dev, int n,
+			 struct xdp_frame **frames, u32 flags)
+{
+	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+	struct net_device *rcv;
+	int i, drops = 0;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	rcv = rcu_dereference(priv->peer);
+	if (unlikely(!rcv))
+		return -ENXIO;
+
+	rcv_priv = netdev_priv(rcv);
+	/* xdp_ring is initialized on receive side? */
+	if (!rcu_access_pointer(rcv_priv->xdp_prog))
+		return -ENXIO;
+
+	spin_lock(&rcv_priv->xdp_ring.producer_lock);
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *frame = frames[i];
+		void *ptr = veth_xdp_to_ptr(frame);
+
+		if (unlikely(xdp_ok_fwd_dev(rcv, frame->len) ||
+			     __ptr_ring_produce(&rcv_priv->xdp_ring, ptr))) {
+			xdp_return_frame_rx_napi(frame);
+			drops++;
+		}
+	}
+	spin_unlock(&rcv_priv->xdp_ring.producer_lock);
+
+	if (flags & XDP_XMIT_FLUSH)
+		__veth_xdp_flush(rcv_priv);
+
+	return n - drops;
+}
+
 static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
 					struct xdp_frame *frame)
 {
@@ -760,6 +804,7 @@  static const struct net_device_ops veth_netdev_ops = {
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
 	.ndo_bpf		= veth_xdp,
+	.ndo_xdp_xmit		= veth_xdp_xmit,
 };
 
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \