diff mbox series

[net-next,V2,2/5] bpf: XDP_REDIRECT enable use of cpumap

Message ID 150670285728.23765.652894515383691347.stgit@firesoul
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series New bpf cpumap type for XDP_REDIRECT | expand

Commit Message

Jesper Dangaard Brouer Sept. 29, 2017, 4:34 p.m. UTC
This patch connects cpumap to the xdp_do_redirect_map infrastructure.

Still no SKB allocation are done yet.  The XDP frames are transferred
to the other CPU, but they are simply refcnt decremented on the remote
CPU.  This served as a good benchmark for measuring the overhead of
remote refcnt decrement.  If driver page recycle cache is not
efficient then this, exposes a bottleneck in the page allocator.

A shout-out to MST's ptr_ring, which is the secret behind is being so
efficient to transfer memory pointers between CPUs, without constantly
bouncing cache-lines between CPUs.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/linux/bpf.h        |    7 +++++
 include/trace/events/xdp.h |   10 +++++--
 kernel/bpf/cpumap.c        |    5 ++-
 kernel/bpf/verifier.c      |    3 +-
 net/core/filter.c          |   65 +++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 77 insertions(+), 13 deletions(-)

Comments

kernel test robot Oct. 1, 2017, 12:13 a.m. UTC | #1
Hi Jesper,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jesper-Dangaard-Brouer/New-bpf-cpumap-type-for-XDP_REDIRECT/20171001-064716
config: i386-randconfig-i0-201740 (attached as .config)
compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   net//core/filter.c: In function '__bpf_tx_xdp_map':
>> net//core/filter.c:2550:3: error: implicit declaration of function 'cpu_map_enqueue' [-Werror=implicit-function-declaration]
      err = cpu_map_enqueue(rcpu, xdp, dev_rx);
      ^
>> net//core/filter.c:2553:3: error: implicit declaration of function '__cpu_map_insert_ctx' [-Werror=implicit-function-declaration]
      __cpu_map_insert_ctx(map, index);
      ^
   net//core/filter.c: In function 'xdp_do_flush_map':
>> net//core/filter.c:2570:4: error: implicit declaration of function '__cpu_map_flush' [-Werror=implicit-function-declaration]
       __cpu_map_flush(map);
       ^
   net//core/filter.c: In function '__xdp_map_lookup_elem':
>> net//core/filter.c:2585:3: error: implicit declaration of function '__cpu_map_lookup_elem' [-Werror=implicit-function-declaration]
      return __cpu_map_lookup_elem(map, index);
      ^
>> net//core/filter.c:2585:3: warning: return makes pointer from integer without a cast [enabled by default]
   cc1: some warnings being treated as errors

vim +/cpu_map_enqueue +2550 net//core/filter.c

  2527	
  2528	static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
  2529				    struct bpf_map *map,
  2530				    struct xdp_buff *xdp,
  2531				    u32 index)
  2532	{
  2533		int err;
  2534	
  2535		if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
  2536			struct net_device *dev = fwd;
  2537	
  2538			if (!dev->netdev_ops->ndo_xdp_xmit) {
  2539				return -EOPNOTSUPP;
  2540			}
  2541	
  2542			err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
  2543			if (err)
  2544				return err;
  2545			__dev_map_insert_ctx(map, index);
  2546	
  2547		} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
  2548			struct bpf_cpu_map_entry *rcpu = fwd;
  2549	
> 2550			err = cpu_map_enqueue(rcpu, xdp, dev_rx);
  2551			if (err)
  2552				return err;
> 2553			__cpu_map_insert_ctx(map, index);
  2554		}
  2555		return 0;
  2556	}
  2557	
  2558	void xdp_do_flush_map(void)
  2559	{
  2560		struct redirect_info *ri = this_cpu_ptr(&redirect_info);
  2561		struct bpf_map *map = ri->map_to_flush;
  2562	
  2563		ri->map_to_flush = NULL;
  2564		if (map) {
  2565			switch (map->map_type) {
  2566			case BPF_MAP_TYPE_DEVMAP:
  2567				__dev_map_flush(map);
  2568				break;
  2569			case BPF_MAP_TYPE_CPUMAP:
> 2570				__cpu_map_flush(map);
  2571				break;
  2572			default:
  2573				break;
  2574			}
  2575		}
  2576	}
  2577	EXPORT_SYMBOL_GPL(xdp_do_flush_map);
  2578	
  2579	static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
  2580	{
  2581		switch (map->map_type) {
  2582		case BPF_MAP_TYPE_DEVMAP:
  2583			return __dev_map_lookup_elem(map, index);
  2584		case BPF_MAP_TYPE_CPUMAP:
> 2585			return __cpu_map_lookup_elem(map, index);
  2586		default:
  2587			return NULL;
  2588		}
  2589	}
  2590	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b672c50f160..7f70b03e7426 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -317,6 +317,13 @@  struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 
+struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
+void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
+void __cpu_map_flush(struct bpf_map *map);
+struct xdp_buff;
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+		    struct net_device *dev_rx);
+
 /* Return map's numa specified by userspace */
 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 {
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 4e16c43fba10..eb2ece96c1a2 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -136,12 +136,18 @@  DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
 		  __entry->map_id, __entry->map_index)
 );
 
+#define devmap_ifindex(fwd, map)				\
+	(!fwd ? 0 :						\
+	 (!map ? 0 :						\
+	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	   ((struct net_device *)fwd)->ifindex : 0)))
+
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
-	 trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0,	\
+	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
 				0, map, idx)
 
 #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)	\
-	 trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0,	\
+	 trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map),	\
 				    err, map, idx)
 
 #endif /* _TRACE_XDP_H */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 94fe2047e264..9a085f17e387 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -496,7 +496,8 @@  static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
 	return 0;
 }
 
-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+		    struct net_device *dev_rx)
 {
 	struct xdp_pkt *xdp_pkt;
 	int headroom;
@@ -508,7 +509,7 @@  int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)
 	xdp_pkt = xdp->data_hard_start;
 	xdp_pkt->data = xdp->data;
 	xdp_pkt->len  = xdp->data_end - xdp->data;
-	xdp_pkt->headroom = headroom;
+	xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
 	/* For now this is just used as a void pointer to data_hard_start */
 
 	bq_enqueue(rcpu, xdp_pkt);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f849eca36052..a712c7431c2d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1589,7 +1589,8 @@  static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_FUNC_redirect_map:
-		if (map->map_type != BPF_MAP_TYPE_DEVMAP)
+		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+		    map->map_type != BPF_MAP_TYPE_CPUMAP)
 			goto error;
 		break;
 	case BPF_FUNC_sk_redirect_map:
diff --git a/net/core/filter.c b/net/core/filter.c
index 9b6e7e84aafd..37fe9e631ee4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2521,10 +2521,37 @@  static int __bpf_tx_xdp(struct net_device *dev,
 	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
 	if (err)
 		return err;
-	if (map)
+	dev->netdev_ops->ndo_xdp_flush(dev);
+	return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+			    struct bpf_map *map,
+			    struct xdp_buff *xdp,
+			    u32 index)
+{
+	int err;
+
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+		struct net_device *dev = fwd;
+
+		if (!dev->netdev_ops->ndo_xdp_xmit) {
+			return -EOPNOTSUPP;
+		}
+
+		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+		if (err)
+			return err;
 		__dev_map_insert_ctx(map, index);
-	else
-		dev->netdev_ops->ndo_xdp_flush(dev);
+
+	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+		struct bpf_cpu_map_entry *rcpu = fwd;
+
+		err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+		if (err)
+			return err;
+		__cpu_map_insert_ctx(map, index);
+	}
 	return 0;
 }
 
@@ -2534,11 +2561,33 @@  void xdp_do_flush_map(void)
 	struct bpf_map *map = ri->map_to_flush;
 
 	ri->map_to_flush = NULL;
-	if (map)
-		__dev_map_flush(map);
+	if (map) {
+		switch (map->map_type) {
+		case BPF_MAP_TYPE_DEVMAP:
+			__dev_map_flush(map);
+			break;
+		case BPF_MAP_TYPE_CPUMAP:
+			__cpu_map_flush(map);
+			break;
+		default:
+			break;
+		}
+	}
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		return __dev_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_CPUMAP:
+		return __cpu_map_lookup_elem(map, index);
+	default:
+		return NULL;
+	}
+}
+
 static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
 				   unsigned long aux)
 {
@@ -2551,8 +2600,8 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
-	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
+	void *fwd = NULL;
 	int err;
 
 	ri->ifindex = 0;
@@ -2565,7 +2614,7 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 		goto err;
 	}
 
-	fwd = __dev_map_lookup_elem(map, index);
+	fwd = __xdp_map_lookup_elem(map, index);
 	if (!fwd) {
 		err = -EINVAL;
 		goto err;
@@ -2573,7 +2622,7 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 	if (ri->map_to_flush && ri->map_to_flush != map)
 		xdp_do_flush_map();
 
-	err = __bpf_tx_xdp(fwd, map, xdp, index);
+	err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
 	if (unlikely(err))
 		goto err;