Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/819581/?format=api
{ "id": 819581, "url": "http://patchwork.ozlabs.org/api/patches/819581/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/150660343302.2808.13091201420033844465.stgit@firesoul/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<150660343302.2808.13091201420033844465.stgit@firesoul>", "list_archive_url": null, "date": "2017-09-28T12:57:13", "name": "[net-next,2/5] bpf: XDP_REDIRECT enable use of cpumap", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "7f653f730f4f578507e537b9394f4cb3fdff2a8c", "submitter": { "id": 13625, "url": "http://patchwork.ozlabs.org/api/people/13625/?format=api", "name": "Jesper Dangaard Brouer", "email": "brouer@redhat.com" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/150660343302.2808.13091201420033844465.stgit@firesoul/mbox/", "series": [ { "id": 5560, "url": "http://patchwork.ozlabs.org/api/series/5560/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/list/?series=5560", "date": "2017-09-28T12:57:02", "name": "New bpf cpumap type for XDP_REDIRECT", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/5560/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/819581/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/819581/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=netdev-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)", "ext-mx01.extmail.prod.ext.phx2.redhat.com;\n\tdmarc=none (p=none dis=none) header.from=redhat.com", "ext-mx01.extmail.prod.ext.phx2.redhat.com;\n\tspf=fail smtp.mailfrom=brouer@redhat.com" ], "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3y2vq805kBz9t66\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 28 Sep 2017 22:57:24 +1000 (AEST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1753113AbdI1M5W (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tThu, 28 Sep 2017 08:57:22 -0400", "from mx1.redhat.com ([209.132.183.28]:7786 \"EHLO mx1.redhat.com\"\n\trhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP\n\tid S1753090AbdI1M5U (ORCPT <rfc822;netdev@vger.kernel.org>);\n\tThu, 28 Sep 2017 08:57:20 -0400", "from smtp.corp.redhat.com\n\t(int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby mx1.redhat.com (Postfix) with ESMTPS id 2CE327CDBB;\n\tThu, 28 Sep 2017 12:57:20 +0000 (UTC)", "from firesoul.localdomain (ovpn-200-26.brq.redhat.com\n\t[10.40.200.26])\n\tby smtp.corp.redhat.com (Postfix) with ESMTP id EC11D86E6E;\n\tThu, 28 Sep 2017 12:57:13 +0000 (UTC)", "from [192.168.5.1] (localhost [IPv6:::1])\n\tby firesoul.localdomain (Postfix) with ESMTP id 191E9300006B0;\n\tThu, 28 Sep 2017 14:57:13 +0200 (CEST)" ], "DMARC-Filter": "OpenDMARC Filter v1.3.2 mx1.redhat.com 2CE327CDBB", "Subject": "[net-next PATCH 2/5] bpf: XDP_REDIRECT enable use of cpumap", "From": "Jesper Dangaard Brouer <brouer@redhat.com>", "To": "netdev@vger.kernel.org", "Cc": "jakub.kicinski@netronome.com, \"Michael S. Tsirkin\" <mst@redhat.com>,\n\tJason Wang <jasowang@redhat.com>, mchan@broadcom.com,\n\tJohn Fastabend <john.fastabend@gmail.com>, peter.waskiewicz.jr@intel.com,\n\tJesper Dangaard Brouer <brouer@redhat.com>,\n\tDaniel Borkmann <borkmann@iogearbox.net>,\n\tAlexei Starovoitov <alexei.starovoitov@gmail.com>,\n\tAndy Gospodarek <andy@greyhouse.net>", "Date": "Thu, 28 Sep 2017 14:57:13 +0200", "Message-ID": "<150660343302.2808.13091201420033844465.stgit@firesoul>", "In-Reply-To": "<150660339205.2808.7084136789768233829.stgit@firesoul>", "References": "<150660339205.2808.7084136789768233829.stgit@firesoul>", "User-Agent": "StGit/0.17.1-dirty", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "7bit", "X-Scanned-By": "MIMEDefang 2.79 on 10.5.11.12", "X-Greylist": "Sender IP whitelisted, not delayed by milter-greylist-4.5.16\n\t(mx1.redhat.com [10.5.110.25]);\n\tThu, 28 Sep 2017 12:57:20 +0000 (UTC)", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This patch connects cpumap to the xdp_do_redirect_map infrastructure.\n\nStill no SKB allocation are done yet. The XDP frames are transferred\nto the other CPU, but they are simply refcnt decremented on the remote\nCPU. This served as a good benchmark for measuring the overhead of\nremote refcnt decrement. If driver page recycle cache is not\nefficient then this, exposes a bottleneck in the page allocator.\n\nA shout-out to MST's ptr_ring, which is the secret behind is being so\nefficient to transfer memory pointers between CPUs, without constantly\nbouncing cache-lines between CPUs.\n\nSigned-off-by: Jesper Dangaard Brouer <brouer@redhat.com>\n---\n include/linux/bpf.h | 7 +++++\n include/trace/events/xdp.h | 10 +++++--\n kernel/bpf/cpumap.c | 5 ++-\n kernel/bpf/verifier.c | 3 +-\n net/core/filter.c | 65 +++++++++++++++++++++++++++++++++++++++-----\n 5 files changed, 77 insertions(+), 13 deletions(-)", "diff": "diff --git a/include/linux/bpf.h b/include/linux/bpf.h\nindex 2b672c50f160..7f70b03e7426 100644\n--- a/include/linux/bpf.h\n+++ b/include/linux/bpf.h\n@@ -317,6 +317,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);\n void __dev_map_insert_ctx(struct bpf_map *map, u32 index);\n void __dev_map_flush(struct bpf_map *map);\n \n+struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);\n+void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);\n+void __cpu_map_flush(struct bpf_map *map);\n+struct xdp_buff;\n+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,\n+\t\t struct net_device *dev_rx);\n+\n /* Return map's numa specified by userspace */\n static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)\n {\ndiff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h\nindex 4e16c43fba10..eb2ece96c1a2 100644\n--- a/include/trace/events/xdp.h\n+++ b/include/trace/events/xdp.h\n@@ -136,12 +136,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,\n \t\t __entry->map_id, __entry->map_index)\n );\n \n+#define devmap_ifindex(fwd, map)\t\t\t\t\\\n+\t(!fwd ? 0 :\t\t\t\t\t\t\\\n+\t (!map ? 0 :\t\t\t\t\t\t\\\n+\t ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?\t\t\\\n+\t ((struct net_device *)fwd)->ifindex : 0)))\n+\n #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)\t\t\\\n-\t trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0,\t\\\n+\t trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),\t\\\n \t\t\t\t0, map, idx)\n \n #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)\t\\\n-\t trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0,\t\\\n+\t trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map),\t\\\n \t\t\t\t err, map, idx)\n \n #endif /* _TRACE_XDP_H */\ndiff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c\nindex f0948af82e65..ce2490ad860d 100644\n--- a/kernel/bpf/cpumap.c\n+++ b/kernel/bpf/cpumap.c\n@@ -488,7 +488,8 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)\n \treturn 0;\n }\n \n-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)\n+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,\n+\t\t struct net_device *dev_rx)\n {\n \tstruct xdp_pkt *xdp_pkt;\n \tint headroom;\n@@ -500,7 +501,7 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)\n \txdp_pkt = xdp->data_hard_start;\n \txdp_pkt->data = xdp->data;\n \txdp_pkt->len = xdp->data_end - xdp->data;\n-\txdp_pkt->headroom = headroom;\n+\txdp_pkt->headroom = headroom - sizeof(*xdp_pkt);\n \t/* For now this is just used as a void pointer to data_hard_start */\n \n \tbq_enqueue(rcpu, xdp_pkt);\ndiff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c\nindex f849eca36052..a712c7431c2d 100644\n--- a/kernel/bpf/verifier.c\n+++ b/kernel/bpf/verifier.c\n@@ -1589,7 +1589,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)\n \t\t\tgoto error;\n \t\tbreak;\n \tcase BPF_FUNC_redirect_map:\n-\t\tif (map->map_type != BPF_MAP_TYPE_DEVMAP)\n+\t\tif (map->map_type != BPF_MAP_TYPE_DEVMAP &&\n+\t\t map->map_type != BPF_MAP_TYPE_CPUMAP)\n \t\t\tgoto error;\n \t\tbreak;\n \tcase BPF_FUNC_sk_redirect_map:\ndiff --git a/net/core/filter.c b/net/core/filter.c\nindex 9b6e7e84aafd..37fe9e631ee4 100644\n--- a/net/core/filter.c\n+++ b/net/core/filter.c\n@@ -2521,10 +2521,37 @@ static int __bpf_tx_xdp(struct net_device *dev,\n \terr = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);\n \tif (err)\n \t\treturn err;\n-\tif (map)\n+\tdev->netdev_ops->ndo_xdp_flush(dev);\n+\treturn 0;\n+}\n+\n+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,\n+\t\t\t struct bpf_map *map,\n+\t\t\t struct xdp_buff *xdp,\n+\t\t\t u32 index)\n+{\n+\tint err;\n+\n+\tif (map->map_type == BPF_MAP_TYPE_DEVMAP) {\n+\t\tstruct net_device *dev = fwd;\n+\n+\t\tif (!dev->netdev_ops->ndo_xdp_xmit) {\n+\t\t\treturn -EOPNOTSUPP;\n+\t\t}\n+\n+\t\terr = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);\n+\t\tif (err)\n+\t\t\treturn err;\n \t\t__dev_map_insert_ctx(map, index);\n-\telse\n-\t\tdev->netdev_ops->ndo_xdp_flush(dev);\n+\n+\t} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {\n+\t\tstruct bpf_cpu_map_entry *rcpu = fwd;\n+\n+\t\terr = cpu_map_enqueue(rcpu, xdp, dev_rx);\n+\t\tif (err)\n+\t\t\treturn err;\n+\t\t__cpu_map_insert_ctx(map, index);\n+\t}\n \treturn 0;\n }\n \n@@ -2534,11 +2561,33 @@ void xdp_do_flush_map(void)\n \tstruct bpf_map *map = ri->map_to_flush;\n \n \tri->map_to_flush = NULL;\n-\tif (map)\n-\t\t__dev_map_flush(map);\n+\tif (map) {\n+\t\tswitch (map->map_type) {\n+\t\tcase BPF_MAP_TYPE_DEVMAP:\n+\t\t\t__dev_map_flush(map);\n+\t\t\tbreak;\n+\t\tcase BPF_MAP_TYPE_CPUMAP:\n+\t\t\t__cpu_map_flush(map);\n+\t\t\tbreak;\n+\t\tdefault:\n+\t\t\tbreak;\n+\t\t}\n+\t}\n }\n EXPORT_SYMBOL_GPL(xdp_do_flush_map);\n \n+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)\n+{\n+\tswitch (map->map_type) {\n+\tcase BPF_MAP_TYPE_DEVMAP:\n+\t\treturn __dev_map_lookup_elem(map, index);\n+\tcase BPF_MAP_TYPE_CPUMAP:\n+\t\treturn __cpu_map_lookup_elem(map, index);\n+\tdefault:\n+\t\treturn NULL;\n+\t}\n+}\n+\n static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,\n \t\t\t\t unsigned long aux)\n {\n@@ -2551,8 +2600,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,\n \tstruct redirect_info *ri = this_cpu_ptr(&redirect_info);\n \tunsigned long map_owner = ri->map_owner;\n \tstruct bpf_map *map = ri->map;\n-\tstruct net_device *fwd = NULL;\n \tu32 index = ri->ifindex;\n+\tvoid *fwd = NULL;\n \tint err;\n \n \tri->ifindex = 0;\n@@ -2565,7 +2614,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,\n \t\tgoto err;\n \t}\n \n-\tfwd = __dev_map_lookup_elem(map, index);\n+\tfwd = __xdp_map_lookup_elem(map, index);\n \tif (!fwd) {\n \t\terr = -EINVAL;\n \t\tgoto err;\n@@ -2573,7 +2622,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,\n \tif (ri->map_to_flush && ri->map_to_flush != map)\n \t\txdp_do_flush_map();\n \n-\terr = __bpf_tx_xdp(fwd, map, xdp, index);\n+\terr = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);\n \tif (unlikely(err))\n \t\tgoto err;\n \n", "prefixes": [ "net-next", "2/5" ] }