From patchwork Tue Apr 24 14:39:15 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903535 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="AQqQdP3p"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmFR3k94z9ry1 for ; Wed, 25 Apr 2018 00:39:55 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754353AbeDXOjx (ORCPT ); Tue, 24 Apr 2018 10:39:53 -0400 Received: from mail-pf0-f195.google.com ([209.85.192.195]:42277 "EHLO mail-pf0-f195.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753873AbeDXOjm (ORCPT ); Tue, 24 Apr 2018 10:39:42 -0400 Received: by mail-pf0-f195.google.com with SMTP id o16so12428075pfk.9 for ; Tue, 24 Apr 2018 07:39:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=Ka0ZM5CLXuMLb1EDxJatXwxi10OkeGsdQMib7YdxCHM=; b=AQqQdP3pX+Nmi8VdfZnNZ3ymQawvgaJOUXty8jnRzPqC1z7eeOmsQ+1piN2GJUmwD8 Q/RlY8bxoRzmOSo+EmernhTjsCLRC6pUKS0irggN9QqROn5BYc2aA+wnHZDeZ08LlgVJ KruEj+1OU7WVroh8eOiwqpWC9WYnq1O+0YNrjWLovjsy139Ibe0/QAmFFuv6M9SRzArL xbnqp6WGZIZWAJhGbx+xN4Kl2ykk1hEV4gkROhG4VLYWqfHjNdazc0yNaAU5k7zXnveV 0Y6v3NKqN3wHDSgvQznEvJHPBJQSHXC9c/cUT/YroycCtvDDMORirPEyC6KJ9WjaPeYy Ygkg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=Ka0ZM5CLXuMLb1EDxJatXwxi10OkeGsdQMib7YdxCHM=; b=gK+nqpAeSp7uKDKSBEa9nIxUrHr7mHsL6iMnBsSznvRh5maZekG9fhXJgrBfEsVJEV tbP951tPDdPd+E6PcEiiup92EHQFH3Nu+Xq9FbHPfDaqPku+W+paGFn1D197FqVHNsL9 iFoTlmXV+bZV0iFJf0tgFEFbwlzNFH8jDC8X6eLKd6V/8+zdu7jeJ4u3qcWfo7zaQHrb brS2Zb5HcdXT6AOOchrE0FGW47H/tutqkZZsOZyUGVkEdE8RqZef37Jwcn7Tm7mhH4OI 3HLb2j28d09GlXhO9+W2/VZ021DL8LUCJFAsLVPkKasTytGOWhPSY2LV4YdCWkzWnRn3 Nb+Q== X-Gm-Message-State: ALQs6tBeuvrzPxIMpOEO4JRjQPOT+3fmKH970ghf8ms6pR/uyKwjK7al 6XJcL0FseBCF5E/e3yLC5bjf6TZkWVk= X-Google-Smtp-Source: AIpwx481GJMMk/K33FKYb7kFX8gR+5GD7zArOznzzINNpJw065nFwa2FaO30lAqLMXrbUWu30yw+Tg== X-Received: by 10.98.118.130 with SMTP id r124mr24117655pfc.80.1524580782223; Tue, 24 Apr 2018 07:39:42 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.40 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:41 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 1/9] net: Export skb_headers_offset_update and skb_copy_header Date: Tue, 24 Apr 2018 23:39:15 +0900 Message-Id: <20180424143923.26519-2-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita Signed-off-by: Toshiaki Makita --- include/linux/skbuff.h | 2 ++ net/core/skbuff.c | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9065477ed255..fdf80a9d4582 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1030,6 +1030,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, } struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +void skb_headers_offset_update(struct sk_buff *skb, int off); +void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 345b51837ca8..531354900177 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1290,7 +1290,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) } EXPORT_SYMBOL(skb_clone); -static void skb_headers_offset_update(struct sk_buff *skb, int off) +void skb_headers_offset_update(struct sk_buff *skb, int off) { /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -1304,8 +1304,9 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) skb->inner_network_header += off; skb->inner_mac_header += off; } +EXPORT_SYMBOL(skb_headers_offset_update); -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) { __copy_skb_header(new, old); @@ -1313,6 +1314,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +EXPORT_SYMBOL(skb_copy_header); static inline int skb_alloc_rx_flag(const struct sk_buff *skb) { @@ -1355,7 +1357,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); - copy_skb_header(n, skb); + skb_copy_header(n, skb); return n; } EXPORT_SYMBOL(skb_copy); @@ -1419,7 +1421,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, skb_clone_fraglist(n); } - copy_skb_header(n, skb); + skb_copy_header(n, skb); out: return n; } @@ -1599,7 +1601,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, skb->len + head_copy_len)); - copy_skb_header(n, skb); + skb_copy_header(n, skb); skb_headers_offset_update(n, newheadroom - oldheadroom); From patchwork Tue Apr 24 14:39:16 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903534 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="cGjz0ADV"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmFQ180jz9ry1 for ; Wed, 25 Apr 2018 00:39:54 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754196AbeDXOju (ORCPT ); Tue, 24 Apr 2018 10:39:50 -0400 Received: from mail-pg0-f66.google.com ([74.125.83.66]:43469 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753690AbeDXOjo (ORCPT ); Tue, 24 Apr 2018 10:39:44 -0400 Received: by mail-pg0-f66.google.com with SMTP id f132so11024302pgc.10 for ; Tue, 24 Apr 2018 07:39:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=94RuyL3125mHKsDk71P2MTaFHguqhHT8JxkiDaO3RGA=; b=cGjz0ADVBos9aGJmkLMZGawY3z48YAq+hCn9dLoDz0sRv1DyiO2UUu79uOyvmsB0Eu pMRXgmPsWR7XbNur2LUz1tb8bn8RPKbB1LYX4+hDGq5SxpzjOnW4a+huY+WvrS9t72fU nwzUDVPavR/5Q6ptGY+h8i8YtmhXZEh8drEw1fEQESeKnt3ExtbGoej91T86+wULQdft 5R7SKrcZUpSwiSx6xUfH4R3k3a4d6AAmwucc849jQP7LETjY2PM5LEGq3ecDRHdHqXbm kkJNgaLS/0XknP3T38BkoloMsXN6dmJeCQrxGOJLgYQjUJvHi9JAhAruJXQpjOzAJQhH hBXw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=94RuyL3125mHKsDk71P2MTaFHguqhHT8JxkiDaO3RGA=; b=GoIbTSnGKH7eE2FcUkza02YRDR+9l6j/A6paIMx/2cXLjIlhn7+gB5Iz2c1kE20162 wcT+7txcj8CmLCo+m5+bV9RtKDGrGo2xH4yMV/K+5nbOQJeO1ldrBgKQvGQ3ppUb3Sup u+StwAw6z1IzLUWHGfvAPR7A6Mz2ZMRGI9WSAQvBl8ttD5p8wh5P8FvgJ7l1DhiNerwl vQHSHzUfJHGN0hgDnxFCtG1BVjqnE1OrfBytEbtc8IKjpXM4SMgZuFC2R2XLkOz3YnM2 ZuZ74ROStLVWhKWkFXXABofl1ZKmb/CiooBe1ZD/JwtU3XK/iDliS302cPZk6Bul25CM vNQg== X-Gm-Message-State: ALQs6tAAZkjXLleFpAAcCG+Z/Rp4y3MAeGULQjpzGU3vkm4jCEc6cphG HdFU753+p8Uqh67bn8068svTzqczqtU= X-Google-Smtp-Source: AIpwx4/VrwKc06xrPsOoFrqgIXFKhGIBMmhQRZyBDUuT/EPi6xbtUyz3Lv4mqX5VXRAb2UrXbtGRqQ== X-Received: by 10.99.95.210 with SMTP id t201mr20967289pgb.315.1524580783886; Tue, 24 Apr 2018 07:39:43 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.42 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:43 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 2/9] veth: Add driver XDP Date: Tue, 24 Apr 2018 23:39:16 +0900 Message-Id: <20180424143923.26519-3-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita This is basic implementation of veth driver XDP. Incoming packets are sent from the peer veth device in the form of skb, so this is generally doing the same thing as generic XDP. This itself is not so useful, but a starting point to implement other useful veth XDP features like TX and REDIRECT. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 205 insertions(+), 5 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a69ad39ee57e..9c4197306716 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -19,10 +19,15 @@ #include #include #include +#include +#include +#include #define DRV_NAME "veth" #define DRV_VERSION "1.0" +#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) + struct pcpu_vstats { u64 packets; u64 bytes; @@ -30,9 +35,11 @@ struct pcpu_vstats { }; struct veth_priv { + struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; unsigned requested_headroom; + struct xdp_rxq_info xdp_rxq; }; /* @@ -98,6 +105,25 @@ static const struct ethtool_ops veth_ethtool_ops = { .get_link_ksettings = veth_get_link_ksettings, }; +/* general routines */ + +static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, + struct sk_buff *skb); + +static int veth_xdp_rx(struct net_device *dev, struct sk_buff *skb) +{ + skb = veth_xdp_rcv_skb(dev, skb); + if (!skb) + return NET_RX_DROP; + + return netif_rx(skb); +} + +static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + return __dev_forward_skb(dev, skb) ?: veth_xdp_rx(dev, skb); +} + static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -111,7 +137,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + if (likely(veth_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); u64_stats_update_begin(&stats->syncp); @@ -126,10 +152,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -/* - * general routines - */ - static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -179,19 +201,152 @@ static void veth_set_multicast_list(struct net_device *dev) { } +static struct sk_buff *veth_build_skb(void *head, int headroom, int len, + int buflen) +{ + struct sk_buff *skb; + + if (!buflen) { + buflen = SKB_DATA_ALIGN(headroom + len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + skb = build_skb(head, buflen); + if (!skb) + return NULL; + + skb_reserve(skb, headroom); + skb_put(skb, len); + + return skb; +} + +static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, + struct sk_buff *skb) +{ + struct veth_priv *priv = netdev_priv(dev); + u32 pktlen, headroom, act, metalen; + int size, mac_len, delta, off; + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + void *orig_data; + + rcu_read_lock(); + xdp_prog = rcu_dereference(priv->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + goto out; + } + + mac_len = skb->data - skb_mac_header(skb); + pktlen = skb->len + mac_len; + size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (size > PAGE_SIZE) + goto drop; + + headroom = skb_headroom(skb) - mac_len; + if (skb_shared(skb) || skb_head_is_locked(skb) || + skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { + struct sk_buff *nskb; + void *head, *start; + struct page *page; + int head_off; + + page = alloc_page(GFP_ATOMIC); + if (!page) + goto drop; + + head = page_address(page); + start = head + VETH_XDP_HEADROOM; + if (skb_copy_bits(skb, -mac_len, start, pktlen)) { + page_frag_free(head); + goto drop; + } + + nskb = veth_build_skb(head, + VETH_XDP_HEADROOM + mac_len, skb->len, + PAGE_SIZE); + if (!nskb) { + page_frag_free(head); + goto drop; + } + + skb_copy_header(nskb, skb); + head_off = skb_headroom(nskb) - skb_headroom(skb); + skb_headers_offset_update(nskb, head_off); + dev_consume_skb_any(skb); + skb = nskb; + } + + xdp.data_hard_start = skb->head; + xdp.data = skb_mac_header(skb); + xdp.data_end = xdp.data + pktlen; + xdp.data_meta = xdp.data; + xdp.rxq = &priv->xdp_rxq; + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); + case XDP_DROP: + goto drop; + } + rcu_read_unlock(); + + delta = orig_data - xdp.data; + off = mac_len + delta; + if (off > 0) + __skb_push(skb, off); + else if (off < 0) + __skb_pull(skb, -off); + skb->mac_header -= delta; + skb->protocol = eth_type_trans(skb, dev); + + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); +out: + return skb; +drop: + rcu_read_unlock(); + dev_kfree_skb_any(skb); + return NULL; +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); + int err; if (!peer) return -ENOTCONN; + err = xdp_rxq_info_reg(&priv->xdp_rxq, dev, 0); + if (err < 0) + return err; + + err = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err < 0) + goto err_reg_mem; + if (peer->flags & IFF_UP) { netif_carrier_on(dev); netif_carrier_on(peer); } + return 0; +err_reg_mem: + xdp_rxq_info_unreg(&priv->xdp_rxq); + + return err; } static int veth_close(struct net_device *dev) @@ -203,6 +358,8 @@ static int veth_close(struct net_device *dev) if (peer) netif_carrier_off(peer); + xdp_rxq_info_unreg(&priv->xdp_rxq); + return 0; } @@ -276,6 +433,48 @@ static void veth_set_rx_headroom(struct net_device *dev, int new_hr) rcu_read_unlock(); } +static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct veth_priv *priv = netdev_priv(dev); + struct bpf_prog *old_prog; + + old_prog = rtnl_dereference(priv->xdp_prog); + + rcu_assign_pointer(priv->xdp_prog, prog); + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static u32 veth_xdp_query(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + + xdp_prog = rtnl_dereference(priv->xdp_prog); + if (xdp_prog) + return xdp_prog->aux->id; + + return 0; +} + +static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return veth_xdp_set(dev, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = veth_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, @@ -290,6 +489,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_get_iflink = veth_get_iflink, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, + .ndo_bpf = veth_xdp, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ From patchwork Tue Apr 24 14:39:17 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903537 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="hDy3dOc6"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmFh3cR5z9ry1 for ; Wed, 25 Apr 2018 00:40:08 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754592AbeDXOj5 (ORCPT ); Tue, 24 Apr 2018 10:39:57 -0400 Received: from mail-pf0-f195.google.com ([209.85.192.195]:35331 "EHLO mail-pf0-f195.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752765AbeDXOjq (ORCPT ); Tue, 24 Apr 2018 10:39:46 -0400 Received: by mail-pf0-f195.google.com with SMTP id j5so12435612pfh.2 for ; Tue, 24 Apr 2018 07:39:46 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=NgGLznUpGO7DKLf0fz6l78MvWABSL/omROv2uybaO9I=; b=hDy3dOc6TfXK2P+KpdU8STRb5SNeOBIRVrwGaofN7KjISvXLFitNY8x116LbUko/WD VsQI+T+FZXr7AalmrazsFRHApmV9g9L854SbgADHvXm9csEWoIzg3XQoLpCwyl8XsdVi S3gftl0Hc1sMaZFNLBcBdpFvpzRQBJbaXVbMTUSmJuyYWJTsUxK5u+SdHxibalfT+bOD ORtf9zf01bHB1c9CaWRul71cng8lpmeK4WN67XQqYsRD+F0jVIe87GgFyl/gtmY92Xtc lcBywPulUaOKKD3d4UnPwXXPN/dQaw8jzkMA9eEFJ0MHezrmi4QtdSOoIEgwDe/BMjxW LjzA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=NgGLznUpGO7DKLf0fz6l78MvWABSL/omROv2uybaO9I=; b=epQ86Dzqsc6VJIstD6w8sk8OYzWZsW020R/bziOXhBjABWXChImoi3Yxu4HU+933z5 uHGtvRkmGCZGjJJf+6/QZCPdJK2TOrhmhyVwi3igunHcktIg+3/J2Gxsafd3kDZOLZ75 5pwKd/fAlhW3SXsTGEwDnpvyRauCEobQiQVcigZbc0BGTz50XfYs82sAK/sdF96bwvOk qA7ifR2yHBd6v82J+azncs2LNmEDvTyPOzNQ8yOWLfrxpiFIAYNmveLMPhc2Pzr7KxSS Z6b21SHgKYa7+RNq6XUFn/IL0SdniIsOzq6XdlIeyQWdruPyQ+3kAhNcScEu96VGv2zH wThA== X-Gm-Message-State: ALQs6tDEVqtF1Mk6+Owsxv3og6UEuh1zfvz6E4JoHbR1+efIko+rrapz dN+uVzgC3uhmVj7KpNixGOg72uFiFdg= X-Google-Smtp-Source: AIpwx4/FBwc/CLca+NPKr5e2/1uaWtHLKloGLc3TAvLqSRfB33TPW2O/R8Nzd6LKAlZ2SFLmWLkoZw== X-Received: by 10.101.76.6 with SMTP id u6mr20225603pgq.388.1524580785522; Tue, 24 Apr 2018 07:39:45 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.44 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:45 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 3/9] veth: Avoid drops by oversized packets when XDP is enabled Date: Tue, 24 Apr 2018 23:39:17 +0900 Message-Id: <20180424143923.26519-4-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita All oversized packets including GSO packets are dropped if XDP is enabled on receiver side, so don't send such packets from peer. Drop TSO and SCTP fragmentation features so that veth devices themselves segment packets with XDP enabled. Also cap MTU accordingly. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9c4197306716..7271d9582b4a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -410,6 +410,23 @@ static int veth_get_iflink(const struct net_device *dev) return iflink; } +static netdev_features_t veth_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct veth_priv *priv = netdev_priv(dev); + struct net_device *peer; + + peer = rtnl_dereference(priv->peer); + if (peer) { + struct veth_priv *peer_priv = netdev_priv(peer); + + if (rtnl_dereference(peer_priv->xdp_prog)) + features &= ~NETIF_F_GSO_SOFTWARE; + } + + return features; +} + static void veth_set_rx_headroom(struct net_device *dev, int new_hr) { struct veth_priv *peer_priv, *priv = netdev_priv(dev); @@ -438,13 +455,32 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, { struct veth_priv *priv = netdev_priv(dev); struct bpf_prog *old_prog; + struct net_device *peer; old_prog = rtnl_dereference(priv->xdp_prog); + peer = rtnl_dereference(priv->peer); + + if (!old_prog && prog && peer) { + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; + peer->max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - + peer->hard_header_len - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (peer->mtu > peer->max_mtu) + dev_set_mtu(peer, peer->max_mtu); + } rcu_assign_pointer(priv->xdp_prog, prog); - if (old_prog) + if (old_prog) { bpf_prog_put(old_prog); + if (!prog && peer) { + peer->hw_features |= NETIF_F_GSO_SOFTWARE; + peer->max_mtu = ETH_MAX_MTU; + } + } + + if ((!!old_prog ^ !!prog) && peer) + netdev_update_features(peer); return 0; } @@ -487,6 +523,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_poll_controller = veth_poll_controller, #endif .ndo_get_iflink = veth_get_iflink, + .ndo_fix_features = veth_fix_features, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, From patchwork Tue Apr 24 14:39:18 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903542 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="ALg0pEFa"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmGy3NTsz9ry1 for ; Wed, 25 Apr 2018 00:41:14 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752900AbeDXOlL (ORCPT ); Tue, 24 Apr 2018 10:41:11 -0400 Received: from mail-pg0-f68.google.com ([74.125.83.68]:36228 "EHLO mail-pg0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753844AbeDXOjr (ORCPT ); Tue, 24 Apr 2018 10:39:47 -0400 Received: by mail-pg0-f68.google.com with SMTP id i6so11038502pgv.3 for ; Tue, 24 Apr 2018 07:39:47 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=qTY/srRpjWKavbpljOcvztqq/6gJM1JX9RPleoql+Ug=; b=ALg0pEFa59JF0p2DJkcxW+TvJfCfxKG6HxQuixkECEtmFEIJACt0//rQYspM6SYJk7 eouPx5xs0ugCrngBj683Ye7Kb1RWQfOFzOyBKLi3q6E5dcgYnwJhSGdKkXDIiqd/VENp v/OUdamUpcpUiWsj/uWt3SlbqxUqEQuvCQEE+fMQAkK3eAhNRieNYS2GTzwJfLeARDTW KeTKvFu9rp/RV8opklzemXxhSYYZmKrhmGg7CU6aREwpq588OXa+3cdmHHk2lCaHwkj8 iZ60WXbVJgl797wFlPDQEv/WEdJrYWetVfzG8tiIZDy/CoWqhMYhJtajhAwxBx30Hhhf 1qJw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=qTY/srRpjWKavbpljOcvztqq/6gJM1JX9RPleoql+Ug=; b=ITV/d1p1WC8RQ7REluvY7/93fWsKE/glT9hnJag0d43fFshaT4RYvyNZZ9VxaboeE2 F8G9wqiIebeMnDrAnwgVKAQsd3o/FNqwumKYcVEPNkfEvyY78S9eFKxx0mR/HxLfLc7C 0+LKqnCEKmkZyxrpnKzcGmnrr2dEuzEkDFqZXE+SId/1/f8fl0OIxbo2VJSxvjauVJUi 8LjbHO78/BpXP4RuL4J1tJRROsg7QxRaW6x39zgII+5OVH797dWz+ouo0Dl7cO3zFoZr wZrjZXgkOUwbxfEcK3PR6xqiz3yZYdy0totJxpEOkyrrm+2mkRosrn1uBuRTEkWpyWcC Ckow== X-Gm-Message-State: ALQs6tByqShGbyL0Gvkh8u2ch1PP/BkP1Z4wS90JxUqSl8nvUSesbn6d xQgzn21irQmgqu2Mw038yKc0mYKt2nM= X-Google-Smtp-Source: AIpwx49yIaoS/Dts2qUTFNIOUnJnYC0yXWz0zH8MDONY60vEoVZTrZu+fyQBDVPItfQ+fRqJ3ePcgw== X-Received: by 10.99.121.206 with SMTP id u197mr12190857pgc.242.1524580787051; Tue, 24 Apr 2018 07:39:47 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.45 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:46 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 4/9] veth: Use NAPI for XDP Date: Tue, 24 Apr 2018 23:39:18 +0900 Message-Id: <20180424143923.26519-5-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita In order to avoid stack inflation by recursive XDP program call from ndo_xdp_xmit, this change introduces NAPI in veth. Add veth's own NAPI handler when XDP is enabled. Use ptr_ring to emulate NIC ring. Tx function enqueues packets to the ring and peer NAPI handler drains the ring. This way also makes REDIRECT bulk interface simple. When ndo_xdp_xmit is implemented later, ndo_xdp_flush schedules NAPI of the peer veth device and NAPI handles xdp frames enqueued by previous ndo_xdp_xmit, which is quite similar to physical NIC tx function using DMA ring descriptors and mmio door bell. Currently only one ring is allocated for each veth device, so it does not scale on multiqueue env. This can be resolved in the future by allocating rings on per-queue basis. Note that NAPI is not used but netif_rx is used when XDP is not loaded, so this does not change the default behaviour. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 197 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 164 insertions(+), 33 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 7271d9582b4a..452771f31c30 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -21,11 +21,13 @@ #include #include #include +#include #include #define DRV_NAME "veth" #define DRV_VERSION "1.0" +#define VETH_RING_SIZE 256 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) struct pcpu_vstats { @@ -35,10 +37,14 @@ struct pcpu_vstats { }; struct veth_priv { + struct napi_struct xdp_napi; + struct net_device *dev; struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; unsigned requested_headroom; + bool rx_notify_masked; + struct ptr_ring xdp_ring; struct xdp_rxq_info xdp_rxq; }; @@ -107,28 +113,56 @@ static const struct ethtool_ops veth_ethtool_ops = { /* general routines */ -static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, - struct sk_buff *skb); +static void veth_ptr_free(void *ptr) +{ + if (!ptr) + return; + dev_kfree_skb_any(ptr); +} -static int veth_xdp_rx(struct net_device *dev, struct sk_buff *skb) +static void veth_xdp_flush(struct veth_priv *priv) { - skb = veth_xdp_rcv_skb(dev, skb); - if (!skb) + /* Write ptr_ring before reading rx_notify_masked */ + smp_mb(); + if (!priv->rx_notify_masked) { + priv->rx_notify_masked = true; + napi_schedule(&priv->xdp_napi); + } +} + +static int veth_xdp_enqueue(struct veth_priv *priv, void *ptr) +{ + if (unlikely(ptr_ring_produce(&priv->xdp_ring, ptr))) + return -ENOSPC; + + return 0; +} + +static int veth_xdp_rx(struct veth_priv *priv, struct sk_buff *skb) +{ + if (unlikely(veth_xdp_enqueue(priv, skb))) { + dev_kfree_skb_any(skb); return NET_RX_DROP; + } - return netif_rx(skb); + return NET_RX_SUCCESS; } -static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb) +static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, bool xdp) { - return __dev_forward_skb(dev, skb) ?: veth_xdp_rx(dev, skb); + struct veth_priv *priv = netdev_priv(dev); + + return __dev_forward_skb(dev, skb) ?: xdp ? + veth_xdp_rx(priv, skb) : + netif_rx(skb); } static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { - struct veth_priv *priv = netdev_priv(dev); + struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct net_device *rcv; int length = skb->len; + bool rcv_xdp = false; rcu_read_lock(); rcv = rcu_dereference(priv->peer); @@ -137,7 +171,10 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - if (likely(veth_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + rcv_priv = netdev_priv(rcv); + rcv_xdp = rcu_access_pointer(rcv_priv->xdp_prog); + + if (likely(veth_forward_skb(rcv, skb, rcv_xdp) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); u64_stats_update_begin(&stats->syncp); @@ -148,7 +185,13 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) drop: atomic64_inc(&priv->dropped); } + + /* TODO: check xmit_more and tx_stopped */ + if (rcv_xdp) + veth_xdp_flush(rcv_priv); + rcu_read_unlock(); + return NETDEV_TX_OK; } @@ -220,10 +263,9 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len, return skb; } -static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, +static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, struct sk_buff *skb) { - struct veth_priv *priv = netdev_priv(dev); u32 pktlen, headroom, act, metalen; int size, mac_len, delta, off; struct bpf_prog *xdp_prog; @@ -293,7 +335,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: - trace_xdp_exception(dev, xdp_prog, act); + trace_xdp_exception(priv->dev, xdp_prog, act); case XDP_DROP: goto drop; } @@ -306,7 +348,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, else if (off < 0) __skb_pull(skb, -off); skb->mac_header -= delta; - skb->protocol = eth_type_trans(skb, dev); + skb->protocol = eth_type_trans(skb, priv->dev); metalen = xdp.data - xdp.data_meta; if (metalen) @@ -319,6 +361,72 @@ static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, return NULL; } +static int veth_xdp_rcv(struct veth_priv *priv, int budget) +{ + int i, done = 0; + + for (i = 0; i < budget; i++) { + void *ptr = ptr_ring_consume(&priv->xdp_ring); + struct sk_buff *skb; + + if (!ptr) + break; + + skb = veth_xdp_rcv_skb(priv, ptr); + + if (skb) + napi_gro_receive(&priv->xdp_napi, skb); + + done++; + } + + return done; +} + +static int veth_poll(struct napi_struct *napi, int budget) +{ + struct veth_priv *priv = + container_of(napi, struct veth_priv, xdp_napi); + int done; + + done = veth_xdp_rcv(priv, budget); + + if (done < budget && napi_complete_done(napi, done)) { + /* Write rx_notify_masked before reading ptr_ring */ + smp_store_mb(priv->rx_notify_masked, false); + if (unlikely(!ptr_ring_empty(&priv->xdp_ring))) { + priv->rx_notify_masked = true; + napi_schedule(&priv->xdp_napi); + } + } + + return done; +} + +static int veth_napi_add(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + int err; + + err = ptr_ring_init(&priv->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); + if (err) + return err; + + netif_napi_add(dev, &priv->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); + napi_enable(&priv->xdp_napi); + + return 0; +} + +static void veth_napi_del(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + + napi_disable(&priv->xdp_napi); + netif_napi_del(&priv->xdp_napi); + ptr_ring_cleanup(&priv->xdp_ring, veth_ptr_free); +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -337,6 +445,12 @@ static int veth_open(struct net_device *dev) if (err < 0) goto err_reg_mem; + if (rtnl_dereference(priv->xdp_prog)) { + err = veth_napi_add(dev); + if (err) + goto err_reg_mem; + } + if (peer->flags & IFF_UP) { netif_carrier_on(dev); netif_carrier_on(peer); @@ -358,6 +472,9 @@ static int veth_close(struct net_device *dev) if (peer) netif_carrier_off(peer); + if (rtnl_dereference(priv->xdp_prog)) + veth_napi_del(dev); + xdp_rxq_info_unreg(&priv->xdp_rxq); return 0; @@ -384,15 +501,12 @@ static void veth_dev_free(struct net_device *dev) #ifdef CONFIG_NET_POLL_CONTROLLER static void veth_poll_controller(struct net_device *dev) { - /* veth only receives frames when its peer sends one - * Since it's a synchronous operation, we are guaranteed - * never to have pending data when we poll for it so - * there is nothing to do here. - * - * We need this though so netpoll recognizes us as an interface that - * supports polling, which enables bridge devices in virt setups to - * still use netconsole - */ + struct veth_priv *priv = netdev_priv(dev); + + rcu_read_lock(); + if (rcu_access_pointer(priv->xdp_prog)) + veth_xdp_flush(priv); + rcu_read_unlock(); } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -456,26 +570,40 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct veth_priv *priv = netdev_priv(dev); struct bpf_prog *old_prog; struct net_device *peer; + int err; old_prog = rtnl_dereference(priv->xdp_prog); peer = rtnl_dereference(priv->peer); - if (!old_prog && prog && peer) { - peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; - peer->max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - - peer->hard_header_len - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (peer->mtu > peer->max_mtu) - dev_set_mtu(peer, peer->max_mtu); + if (!old_prog && prog) { + if (dev->flags & IFF_UP) { + err = veth_napi_add(dev); + if (err) + return err; + } + + if (peer) { + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; + peer->max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - + peer->hard_header_len - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (peer->mtu > peer->max_mtu) + dev_set_mtu(peer, peer->max_mtu); + } } rcu_assign_pointer(priv->xdp_prog, prog); if (old_prog) { bpf_prog_put(old_prog); - if (!prog && peer) { - peer->hw_features |= NETIF_F_GSO_SOFTWARE; - peer->max_mtu = ETH_MAX_MTU; + if (!prog) { + if (dev->flags & IFF_UP) + veth_napi_del(dev); + + if (peer) { + peer->hw_features |= NETIF_F_GSO_SOFTWARE; + peer->max_mtu = ETH_MAX_MTU; + } } } @@ -688,10 +816,13 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, */ priv = netdev_priv(dev); + priv->dev = dev; rcu_assign_pointer(priv->peer, peer); priv = netdev_priv(peer); + priv->dev = peer; rcu_assign_pointer(priv->peer, dev); + return 0; err_register_dev: From patchwork Tue Apr 24 14:39:19 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903541 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="gOzFE6al"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmGv4Y50z9ry1 for ; Wed, 25 Apr 2018 00:41:11 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752749AbeDXOlI (ORCPT ); Tue, 24 Apr 2018 10:41:08 -0400 Received: from mail-pg0-f65.google.com ([74.125.83.65]:40390 "EHLO mail-pg0-f65.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754080AbeDXOjt (ORCPT ); Tue, 24 Apr 2018 10:39:49 -0400 Received: by mail-pg0-f65.google.com with SMTP id e9so11026548pgr.7 for ; Tue, 24 Apr 2018 07:39:49 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=Nddq7PL2Ewq21g7hrQdsAx48Qo0uTMzk8Kl8YjGJTNU=; b=gOzFE6alWIoz7+cQzSBOg6Y+TS8KeC8iLQEqoqFlSqhV5hZcyakyRkXlds6RCKXHOM V8Fzy3lM6C9X/Itui7UV9vUqWlWVjED9SDdU299UkVDH8PDiPeL15J7X+xf8Lj5+zjfb HyK/gMtTQ8eYNFmt/HMOt4OiECpK7Pf6xBflCl/U2ZtwOwmCuWyaoF6wPoy8jKmUqR9N stGdK70Q9TbACJ0B34r3iOAXVMyilLpw3HvZiSVjIHL8nW0Pfwul1jQxbbiRoBeJoodb UPTg/v5GKB+buNLA75olAR65xFdFyRjaN8rIUMky/JEUyxP2e6h2x9OPa+JXumQm36FW jJtQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=Nddq7PL2Ewq21g7hrQdsAx48Qo0uTMzk8Kl8YjGJTNU=; b=JaZzT5yJHYiBFO2Yf+9bVg6ytcA+ZSmZZz2a6bI5ieDSxnYRvz+0dALY5LVtC2iqSq KG3nnIpFPanh8pLsPahJ1KgeRBFd+jzHMGSA1/EReVR9Xtm7o56uoHZBrAUbBcTHFXuX kK6BOKuJrou6pyzEw47MGdZC4lOhnWh98NRNr4DugnDw1rbvkvoIkZ+s/2Vqv3fiS3L/ uxUS/napKPMZ9DZNJHUkCtZjsbWTu3VS4xo8M+Sv+Tu3EDxv3FO96SR7IRmHbSuEACgh GuRPdCYFL5jBlurB20mdua4j1PWRX+P9nPjTKUCdFgRph6vmXi6TYo/wgGXjJD5uFjNs XRXA== X-Gm-Message-State: ALQs6tDiGDkdhT+8rzgewOdLticcT64hkgI1oNpF7anxKF3KoFPf0YNF +yvbGcFUX85KsqDJ/TDx6rv/3ksltJw= X-Google-Smtp-Source: AIpwx4+K/sigS3NGKb1ZWSy3AdDAlWSag50NygiFi/r/zDrzwnYR/ydyQ4bXYO43DXW2Xsk2M+XDUQ== X-Received: by 10.98.102.79 with SMTP id a76mr24097211pfc.162.1524580788953; Tue, 24 Apr 2018 07:39:48 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.47 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:48 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 5/9] veth: Handle xdp_frame in xdp napi ring Date: Tue, 24 Apr 2018 23:39:19 +0900 Message-Id: <20180424143923.26519-6-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita This is preparation for XDP TX and ndo_xdp_xmit. Now the napi ring accepts both skb and xdp_frame. When xdp_frame is enqueued, skb will not be allocated until XDP program on veth returns PASS. This will speedup the XDP processing when ndo_xdp_xmit is implemented and xdp_frame is enqueued by the peer device. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 452771f31c30..89c91c1c9935 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -27,6 +27,7 @@ #define DRV_NAME "veth" #define DRV_VERSION "1.0" +#define VETH_XDP_FLAG 0x1UL #define VETH_RING_SIZE 256 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) @@ -48,6 +49,16 @@ struct veth_priv { struct xdp_rxq_info xdp_rxq; }; +static bool veth_is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & VETH_XDP_FLAG; +} + +static void *veth_ptr_to_xdp(void *ptr) +{ + return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); +} + /* * ethtool interface */ @@ -117,7 +128,14 @@ static void veth_ptr_free(void *ptr) { if (!ptr) return; - dev_kfree_skb_any(ptr); + + if (veth_is_xdp_frame(ptr)) { + struct xdp_frame *frame = veth_ptr_to_xdp(ptr); + + xdp_return_frame(frame); + } else { + dev_kfree_skb_any(ptr); + } } static void veth_xdp_flush(struct veth_priv *priv) @@ -263,6 +281,60 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len, return skb; } +static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, + struct xdp_frame *frame) +{ + struct bpf_prog *xdp_prog; + unsigned int headroom; + struct sk_buff *skb; + int len, delta = 0; + + rcu_read_lock(); + xdp_prog = rcu_dereference(priv->xdp_prog); + if (xdp_prog) { + struct xdp_buff xdp; + u32 act; + + xdp.data_hard_start = frame->data - frame->headroom; + xdp.data = frame->data; + xdp.data_end = frame->data + frame->len; + xdp.data_meta = frame->data - frame->metasize; + xdp.rxq = &priv->xdp_rxq; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + delta = frame->data - xdp.data; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(priv->dev, xdp_prog, act); + case XDP_DROP: + goto err_xdp; + } + } + rcu_read_unlock(); + + headroom = frame->data - delta - (void *)frame; + len = frame->len + delta; + skb = veth_build_skb(frame, headroom, len, 0); + if (!skb) { + xdp_return_frame(frame); + goto err; + } + + skb->protocol = eth_type_trans(skb, priv->dev); +err: + return skb; +err_xdp: + rcu_read_unlock(); + xdp_return_frame(frame); + + return NULL; +} + static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, struct sk_buff *skb) { @@ -372,7 +444,10 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget) if (!ptr) break; - skb = veth_xdp_rcv_skb(priv, ptr); + if (veth_is_xdp_frame(ptr)) + skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr)); + else + skb = veth_xdp_rcv_skb(priv, ptr); if (skb) napi_gro_receive(&priv->xdp_napi, skb); From patchwork Tue Apr 24 14:39:20 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903536 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="tstMXnua"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmFZ2Ct6z9ry1 for ; Wed, 25 Apr 2018 00:40:02 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754687AbeDXOj7 (ORCPT ); Tue, 24 Apr 2018 10:39:59 -0400 Received: from mail-pf0-f196.google.com ([209.85.192.196]:44720 "EHLO mail-pf0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753690AbeDXOjv (ORCPT ); Tue, 24 Apr 2018 10:39:51 -0400 Received: by mail-pf0-f196.google.com with SMTP id p15so12427739pff.11 for ; Tue, 24 Apr 2018 07:39:51 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=dhREnwsaVn0clRmdubEXNc3h0abtuhaj0YGcW8OXtUY=; b=tstMXnuaa9yUfrQ+kaAzKZ0Nguc8aOjtKdPCjOOtoxWWy7RM6nvlmAK8GgZmgfEkrf Xpo2mqQp3XcTPFDDG9ZB7ijwwLM65fYjGRBT9v+L6cR0Qrbm3OSiZiBcItmWwpCMY4Ou F3pL0285xEm8pTlNvnbaviq8upA0X9F/LMySn5rBLn3dGv1SkUP9ad2ZsTSa1n4G6tSN NenBUg230xDj/d9vCJetKahMuJ/OY1FrtwJOdbD2HT3khqYpRv+xY3a4wfoX4b1dn4Nf K2wufOYg7E580fbphHiIyqKkpa9RKYUPrzF3Q7h6KBOga2MuabFO4WXuGTU6syVVQc9g XaTA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=dhREnwsaVn0clRmdubEXNc3h0abtuhaj0YGcW8OXtUY=; b=nlq9p6ls+kODQBjHPQd0roBZRfglN55Zju1ipu19Zj04CRBk+0vmAjE+E0+9JsaTUl T0MBnVmd6WzOIMl9qgoQFJuEAJqRZkzArXIWrfno6Cu5z4r90soYRuo1Pe9DVI6Jbh4I 1Vfin89ui2khoNWeORm6jcy7YdVbGCnNtK9f3vf5XUPQvXoL7dQuMaSGWjScbAYDaAkp bZaK53P0kcZfTTZ3PLI6xnt6TUMYSiOigYqykO5w48c/MWlTAWb1l3dim7Aiu54nytqe OK0gBYkv0c3QgCsOT6m9h5LCMsD/2g/+hgCU+bHzks+6iZhEoIgtszkTN/wwvuMcaXjN Mr2Q== X-Gm-Message-State: ALQs6tA0DbZ12aq/yqgVOoFI6rwYm8syuw5wATSSmXJNq7+spgEcW03F zPti2NgVoovWqMyraEq7DMQy8l0CkqE= X-Google-Smtp-Source: AIpwx4/ynJ8MWzbaTKJdM7Xhf5STjfQB3lX3PqtAL7b2W47GGmUrQod/0nwdEAV+diHdEoMSlGw5Dg== X-Received: by 10.98.79.12 with SMTP id d12mr12417364pfb.220.1524580790630; Tue, 24 Apr 2018 07:39:50 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.49 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:50 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 6/9] veth: Add ndo_xdp_xmit Date: Tue, 24 Apr 2018 23:39:20 +0900 Message-Id: <20180424143923.26519-7-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita This allows NIC's XDP to redirect packets to veth. The destination veth device enqueues redirected packets to the napi ring of its peer, then they are processed by XDP on its peer veth device. This can be thought as calling another XDP program by XDP program using REDIRECT, when the peer enables driver XDP. Note that whether an XDP program is loaded on the redirect target veth device does not affect how xdp_frames sent by ndo_xdp_xmit is handled, since the ring sits in rx (peer) side. Instead, whether XDP program is loaded on peer veth does. When peer veth device has driver XDP, ndo_xdp_xmit forwards xdp_frames to its peer without modification. If not, ndo_xdp_xmit converts xdp_frames to skb on sender side and invokes netif_rx rather than dropping them. Although this will not result in good performance, I'm thinking dropping redirected packets when XDP is not loaded on the peer device is too restrictive, so added this fallback. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/filter.h | 16 +++++++++++ net/core/filter.c | 11 +------- 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 89c91c1c9935..b1d591be0eba 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -54,6 +54,11 @@ static bool veth_is_xdp_frame(void *ptr) return (unsigned long)ptr & VETH_XDP_FLAG; } +static void *veth_xdp_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr | VETH_XDP_FLAG); +} + static void *veth_ptr_to_xdp(void *ptr) { return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); @@ -138,7 +143,7 @@ static void veth_ptr_free(void *ptr) } } -static void veth_xdp_flush(struct veth_priv *priv) +static void __veth_xdp_flush(struct veth_priv *priv) { /* Write ptr_ring before reading rx_notify_masked */ smp_mb(); @@ -206,7 +211,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) /* TODO: check xmit_more and tx_stopped */ if (rcv_xdp) - veth_xdp_flush(rcv_priv); + __veth_xdp_flush(rcv_priv); rcu_read_unlock(); @@ -281,6 +286,66 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len, return skb; } +static int veth_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) +{ + struct veth_priv *rcv_priv, *priv = netdev_priv(dev); + int headroom = frame->data - (void *)frame; + struct net_device *rcv; + int err = 0; + + rcv = rcu_dereference(priv->peer); + if (unlikely(!rcv)) + return -ENXIO; + + rcv_priv = netdev_priv(rcv); + /* xdp_ring is initialized on receive side? */ + if (rcu_access_pointer(rcv_priv->xdp_prog)) { + err = xdp_ok_fwd_dev(rcv, frame->len); + if (unlikely(err)) + return err; + + err = veth_xdp_enqueue(rcv_priv, veth_xdp_to_ptr(frame)); + } else { + struct sk_buff *skb; + + skb = veth_build_skb(frame, headroom, frame->len, 0); + if (unlikely(!skb)) + return -ENOMEM; + + /* Get page ref in case skb is dropped in netif_rx. + * The caller is responsible for freeing the page on error. + */ + get_page(virt_to_page(frame->data)); + if (unlikely(veth_forward_skb(rcv, skb, false) != NET_RX_SUCCESS)) + return -ENXIO; + + /* Put page ref on success */ + page_frag_free(frame->data); + } + + return err; +} + +static void veth_xdp_flush(struct net_device *dev) +{ + struct veth_priv *rcv_priv, *priv = netdev_priv(dev); + struct net_device *rcv; + + rcu_read_lock(); + rcv = rcu_dereference(priv->peer); + if (unlikely(!rcv)) + goto out; + + rcv_priv = netdev_priv(rcv); + /* xdp_ring is initialized on receive side? */ + if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog))) + goto out; + + __veth_xdp_flush(rcv_priv); +out: + rcu_read_unlock(); +} + static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, struct xdp_frame *frame) { @@ -580,7 +645,7 @@ static void veth_poll_controller(struct net_device *dev) rcu_read_lock(); if (rcu_access_pointer(priv->xdp_prog)) - veth_xdp_flush(priv); + __veth_xdp_flush(priv); rcu_read_unlock(); } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -730,6 +795,8 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, + .ndo_xdp_xmit = veth_xdp_xmit, + .ndo_xdp_flush = veth_xdp_flush, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4da8b2308174..7d043f51d1d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -752,6 +753,21 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +static __always_inline int +xdp_ok_fwd_dev(const struct net_device *fwd, unsigned int pktlen) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (pktlen > len) + return -EMSGSIZE; + + return 0; +} + /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is diff --git a/net/core/filter.c b/net/core/filter.c index a374b8560bc4..25ae8ffaa968 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2923,16 +2923,7 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect); static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) { - unsigned int len; - - if (unlikely(!(fwd->flags & IFF_UP))) - return -ENETDOWN; - - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) - return -EMSGSIZE; - - return 0; + return xdp_ok_fwd_dev(fwd, skb->len); } static int xdp_do_generic_redirect_map(struct net_device *dev, From patchwork Tue Apr 24 14:39:21 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903540 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="mIbqPy7u"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmGc5fxLz9ry1 for ; Wed, 25 Apr 2018 00:40:56 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752030AbeDXOkz (ORCPT ); Tue, 24 Apr 2018 10:40:55 -0400 Received: from mail-pf0-f194.google.com ([209.85.192.194]:37315 "EHLO mail-pf0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754211AbeDXOjw (ORCPT ); Tue, 24 Apr 2018 10:39:52 -0400 Received: by mail-pf0-f194.google.com with SMTP id p6so12430570pfn.4 for ; Tue, 24 Apr 2018 07:39:52 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=tI/IvpEyXXbSV5QDdhkHyjS7zmrNv8mEZzyOt28Yw/s=; b=mIbqPy7uAlRL04lclJ0QQ+yP+fuBQx3DGb4aCeu7IJ7+kJqIk4PUu6UhkMUUDJQyz0 Z5wyPMsXDUYiuj+of4LmxxrM9kajAJ8a8o20fe98fYGqiEoVHhQZcRXXdKAfzlQt6IOz A0Y/8VWn8ohtUgxuh+zZCsUgkIAVCGwMXAmxZGpp8YGp5hHvQv3TVFhbL1S/uOugOcht /J39iaRtZ30RflPiL3+fJgmwg6oCXrkf+DjL2iQR6j8PselT3P8JReWME9rNFvFEKDPd i45vK0ScMsqGtcm5e2Au9x9vFwawhnfmyWMWJ1sbOVy4kA3xp3jx+86vjqpgB8577MfS u6HQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=tI/IvpEyXXbSV5QDdhkHyjS7zmrNv8mEZzyOt28Yw/s=; b=CxAPmVKxNdl+SzS//fKLQZXCNWdsWzVtp8Qrh1RF9jE6DDIlHMpgxo0tN6TDZ+3kJK rqL8lgcvKFehac357sC6LoWcppFO6ljWJ7Qju1FasqxebArgd0K0ra22gFpiFl7GSBxC c3UticLbJlJuDkKSKEKunSKMB6xyXFWjEysWp89e55jFjahNViA33BNx+uIgBvRe1//U KYuVlEDHisy4KrLS/HcQX3AopFHXE8411bvOkPYWC+7i6PVTSWmf398cWozXkrF7vVcZ kj2Vrq1KglvWw/OJEZu1xhvtpjfrO92FfDxJoAmitBCzbO0PxF0KN4DZQUMrjxPyKwQN gJtw== X-Gm-Message-State: ALQs6tCZK1tHhbfNpeWPmG30sgkY//pDR4bpfPhzMDgjCpYv/BWYI6TZ iugdarXHG+9qMVSapqLyn7hd+eNj2RM= X-Google-Smtp-Source: AIpwx48oWsNbtyQC+cKkr9mb2o8/Z1It9Qo1YaQGwkIfbqdDknPptEImzF/FF11V9MSRPvDtITFt6A== X-Received: by 10.98.76.68 with SMTP id z65mr24387432pfa.181.1524580792128; Tue, 24 Apr 2018 07:39:52 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.50 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:51 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 7/9] veth: Add XDP TX and REDIRECT Date: Tue, 24 Apr 2018 23:39:21 +0900 Message-Id: <20180424143923.26519-8-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita This allows further redirection of xdp_frames like NIC -> veth--veth -> veth--veth (XDP) (XDP) (XDP) The intermediate XDP, redirecting packets from NIC to the other veth, reuses xdp_mem info from NIC so that page recycling of the NIC works on the destination veth's XDP. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index b1d591be0eba..98fc91a64e29 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -43,6 +43,7 @@ struct veth_priv { struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; + struct xdp_mem_info xdp_mem; unsigned requested_headroom; bool rx_notify_masked; struct ptr_ring xdp_ring; @@ -346,9 +347,21 @@ static void veth_xdp_flush(struct net_device *dev) rcu_read_unlock(); } +static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) +{ + struct xdp_frame *frame = convert_to_xdp_frame(xdp); + + if (unlikely(!frame)) + return -EOVERFLOW; + + return veth_xdp_xmit(dev, frame); +} + static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, - struct xdp_frame *frame) + struct xdp_frame *frame, bool *xdp_xmit, + bool *xdp_redir) { + struct xdp_frame orig_frame; struct bpf_prog *xdp_prog; unsigned int headroom; struct sk_buff *skb; @@ -372,6 +385,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, case XDP_PASS: delta = frame->data - xdp.data; break; + case XDP_TX: + orig_frame = *frame; + xdp.data_hard_start = frame; + xdp.rxq->mem = frame->mem; + if (unlikely(veth_xdp_tx(priv->dev, &xdp))) { + trace_xdp_exception(priv->dev, xdp_prog, act); + frame = &orig_frame; + goto err_xdp; + } + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + orig_frame = *frame; + xdp.data_hard_start = frame; + xdp.rxq->mem = frame->mem; + if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) { + frame = &orig_frame; + goto err_xdp; + } + *xdp_redir = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -396,12 +432,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, err_xdp: rcu_read_unlock(); xdp_return_frame(frame); - +xdp_xmit: return NULL; } static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, - struct sk_buff *skb) + struct sk_buff *skb, bool *xdp_xmit, + bool *xdp_redir) { u32 pktlen, headroom, act, metalen; int size, mac_len, delta, off; @@ -469,6 +506,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, switch (act) { case XDP_PASS: break; + case XDP_TX: + get_page(virt_to_page(xdp.data)); + dev_consume_skb_any(skb); + xdp.rxq->mem = priv->xdp_mem; + if (unlikely(veth_xdp_tx(priv->dev, &xdp))) { + trace_xdp_exception(priv->dev, xdp_prog, act); + goto err_xdp; + } + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + get_page(virt_to_page(xdp.data)); + dev_consume_skb_any(skb); + xdp.rxq->mem = priv->xdp_mem; + if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) + goto err_xdp; + *xdp_redir = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -496,9 +553,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, rcu_read_unlock(); dev_kfree_skb_any(skb); return NULL; +err_xdp: + rcu_read_unlock(); + page_frag_free(xdp.data); +xdp_xmit: + return NULL; } -static int veth_xdp_rcv(struct veth_priv *priv, int budget) +static int veth_xdp_rcv(struct veth_priv *priv, int budget, bool *xdp_xmit, + bool *xdp_redir) { int i, done = 0; @@ -509,10 +572,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget) if (!ptr) break; - if (veth_is_xdp_frame(ptr)) - skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr)); - else - skb = veth_xdp_rcv_skb(priv, ptr); + if (veth_is_xdp_frame(ptr)) { + skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr), + xdp_xmit, xdp_redir); + } else { + skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit, xdp_redir); + } if (skb) napi_gro_receive(&priv->xdp_napi, skb); @@ -527,9 +592,11 @@ static int veth_poll(struct napi_struct *napi, int budget) { struct veth_priv *priv = container_of(napi, struct veth_priv, xdp_napi); + bool xdp_xmit = false; + bool xdp_redir = false; int done; - done = veth_xdp_rcv(priv, budget); + done = veth_xdp_rcv(priv, budget, &xdp_xmit, &xdp_redir); if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ @@ -540,6 +607,11 @@ static int veth_poll(struct napi_struct *napi, int budget) } } + if (xdp_xmit) + veth_xdp_flush(priv->dev); + if (xdp_redir) + xdp_do_flush_map(); + return done; } @@ -585,6 +657,9 @@ static int veth_open(struct net_device *dev) if (err < 0) goto err_reg_mem; + /* Save original mem info as it can be overwritten */ + priv->xdp_mem = priv->xdp_rxq.mem; + if (rtnl_dereference(priv->xdp_prog)) { err = veth_napi_add(dev); if (err) @@ -615,6 +690,7 @@ static int veth_close(struct net_device *dev) if (rtnl_dereference(priv->xdp_prog)) veth_napi_del(dev); + priv->xdp_rxq.mem = priv->xdp_mem; xdp_rxq_info_unreg(&priv->xdp_rxq); return 0; From patchwork Tue Apr 24 14:39:22 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903538 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="iXETD/TP"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmGP4Nzjz9s02 for ; Wed, 25 Apr 2018 00:40:45 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754505AbeDXOkO (ORCPT ); Tue, 24 Apr 2018 10:40:14 -0400 Received: from mail-pg0-f66.google.com ([74.125.83.66]:36236 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754401AbeDXOjy (ORCPT ); Tue, 24 Apr 2018 10:39:54 -0400 Received: by mail-pg0-f66.google.com with SMTP id i6so11038718pgv.3 for ; Tue, 24 Apr 2018 07:39:54 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=phSjkHfCr1vZGLL1OQQbAbkAUiQQEVL6XnBxa987jiE=; b=iXETD/TPbgAWoelSnDwMn1R40Fj6YvCgfG4MxEhdm1/pqHJagUSbgkcoTMb63BnDim KBta50ypkTYTBMlASkO9H8ElAWpzq+LEisOA6/HVNEtdN93TQqb/Vemu+G9xuvPBjdjX RbAH9JgQFKR7GX7vnw10YKouafrxuNW/sB9gNfsCW/PJUGUAlM27CZkVhIkTLim1AmsC IDxnslm5ZME5yWVpAXanAml8XjN3Z7SWHT+t+2i9NywrL5MU0ufvuld4msUHQYPPG4d3 GEkrfkcVbGP9lvdBOTlrD3oudVRoI781VCRfyUQO6DUpWMgf97fFGC/FxDurNd4gfNCJ xfMw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=phSjkHfCr1vZGLL1OQQbAbkAUiQQEVL6XnBxa987jiE=; b=e8gKv8EcHF0rnbGmJAJvKkOFwqigrbGILbGEGYQ0Df+VSiyNxNGuGWnDjY1ktHYOJo rnbzyM74VRptjm5OKFWkc1R8KZgsunbEuv1Jp1TSJvnzF9Qs3tbIPXzyV6rjx4PEByNz XUPH4IAovoE2NRtDL5jw3IHuSK093tHXUNHrHjpfGc3o90ovP/tb2VacUNgeYJzdp/cb 9n2kmBb+Asx6A6iPE6Ma4OX6K7MBQXGIzBMic+YSfsa98SZfdJVTznMNgzPwKVCtmBVN FLWD2QjyNyzIAN+iOhgCV5toQNJNHVWRSSo6KEOpqW+WMN5PN2zEE4lPrsRXOOU619q1 h7sQ== X-Gm-Message-State: ALQs6tB0YC3rDrkmL+1+63XVFcsdSi/8yjQ6HJPj9BTr9zMvpgmNzEKZ 1eI0kX6JnWKKKakDAWDrzbOu6P6r4NU= X-Google-Smtp-Source: AIpwx49SBYM1zAtpvHx3lGJi/X88n0PCD6jr2XmSXtm1MEy6ooEARpVBbo5OxLvXCbKLCH1pzwrWtw== X-Received: by 2002:a17:902:274a:: with SMTP id j10-v6mr25801585plg.393.1524580793673; Tue, 24 Apr 2018 07:39:53 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.52 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:53 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 8/9] veth: Avoid per-packet spinlock of XDP napi ring on dequeueing Date: Tue, 24 Apr 2018 23:39:22 +0900 Message-Id: <20180424143923.26519-9-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita Use percpu temporary storage to avoid per-packet spinlock. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 98fc91a64e29..1592119e3873 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -30,6 +30,7 @@ #define VETH_XDP_FLAG 0x1UL #define VETH_RING_SIZE 256 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) +#define VETH_XDP_QUEUE_SIZE NAPI_POLL_WEIGHT struct pcpu_vstats { u64 packets; @@ -50,6 +51,8 @@ struct veth_priv { struct xdp_rxq_info xdp_rxq; }; +static DEFINE_PER_CPU(void *[VETH_XDP_QUEUE_SIZE], xdp_consume_q); + static bool veth_is_xdp_frame(void *ptr) { return (unsigned long)ptr & VETH_XDP_FLAG; @@ -563,27 +566,32 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, static int veth_xdp_rcv(struct veth_priv *priv, int budget, bool *xdp_xmit, bool *xdp_redir) { - int i, done = 0; - - for (i = 0; i < budget; i++) { - void *ptr = ptr_ring_consume(&priv->xdp_ring); - struct sk_buff *skb; - - if (!ptr) - break; + void **q = this_cpu_ptr(xdp_consume_q); + int num, lim, done = 0; + + do { + int i; + + lim = min(budget - done, VETH_XDP_QUEUE_SIZE); + num = ptr_ring_consume_batched(&priv->xdp_ring, q, lim); + for (i = 0; i < num; i++) { + struct sk_buff *skb; + void *ptr = q[i]; + + if (veth_is_xdp_frame(ptr)) { + skb = veth_xdp_rcv_one(priv, + veth_ptr_to_xdp(ptr), + xdp_xmit, xdp_redir); + } else { + skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit, + xdp_redir); + } - if (veth_is_xdp_frame(ptr)) { - skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr), - xdp_xmit, xdp_redir); - } else { - skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit, xdp_redir); + if (skb) + napi_gro_receive(&priv->xdp_napi, skb); } - - if (skb) - napi_gro_receive(&priv->xdp_napi, skb); - - done++; - } + done += num; + } while (unlikely(num == lim && done < budget)); return done; } From patchwork Tue Apr 24 14:39:23 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903539 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="F3use0DH"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmGW3jgsz9s02 for ; Wed, 25 Apr 2018 00:40:51 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753426AbeDXOkL (ORCPT ); Tue, 24 Apr 2018 10:40:11 -0400 Received: from mail-pf0-f194.google.com ([209.85.192.194]:35354 "EHLO mail-pf0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754462AbeDXOjz (ORCPT ); Tue, 24 Apr 2018 10:39:55 -0400 Received: by mail-pf0-f194.google.com with SMTP id j5so12435973pfh.2 for ; Tue, 24 Apr 2018 07:39:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=XIlrRLnvrbt+NyF/5Fo+OAhKn1d2GVwO37Lb8gF5cM8=; b=F3use0DHwK1X7OvmsiDxZymU1qRpoI2nYR8p0pCA5V1SZlumIeqiOKRdiyHfZrCHJe 3i9k2iavuOHJEcnq0Zs/wSSR8iVThxjSfwXxg57i6N36sre5WMVkBZymEp6wYuQqxu0A vd3lIwuta97ZqxuaWRAUw61qYMyIwpOgXEBVL9rtF/AVhZ/dTe998KSYsqg+S1HUiefw LTduQZMKePDVxdJeGS3Je2eVyvrKehq/XNLjkmFU34ftPf73+7KPxRHEQDVFy5RfVd1m x7Fa8a7pPoW6xt5s9nmBPBCp193eUEUE8JYT7oHL0abh2m68c13oC6F8gfcaDsJumscz Zf6A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=XIlrRLnvrbt+NyF/5Fo+OAhKn1d2GVwO37Lb8gF5cM8=; b=JpMmwtVRCSSS9Y8x/45KtFcHcvSbsCvc7j1klnxUx1rCZCKXcvtZnJA/KBJwhFltMb PYa6clVDStc+XCJMn2tMrWZhVDxHsNimMFEzw8+jTXEhhObShJgMC8Z8fvGVFaken+4Y +trJM7wb+Zqub/izs/hpRNgw2ju/yyTjNnKT49f0ZHZTQnwmq+LzatCWs1WMZstjJ0/B 2qlvxh3enPQ+c/CGXoqjCZMi91sOxErlq/wArtUlJlsIasw43UnKC8D7xRk1FCJ9I4qb M/GyFpiRJN9Tt+/IpYqUjs6cBkUMIZkwWdI6ZmeoymzpkFFJ+kiGA/eueOKMfLelPVFy JEEg== X-Gm-Message-State: ALQs6tBo7NHxmMWvAcWwJ37D71Z5zvitqqOem3J3zXncD1iQynY6YCqu KFlGbiuRzcC3yIp44PXiikyT2LYy5tI= X-Google-Smtp-Source: AIpwx4/xMlct4lN84h6d0x46POLNFoWltL4d8K1jhIVBROalEj3RXOubIbG9fZlmK3ysnpgJI+ioyg== X-Received: by 2002:a17:902:9a8b:: with SMTP id w11-v6mr25278766plp.75.1524580795168; Tue, 24 Apr 2018 07:39:55 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.53 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:54 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 9/9] veth: Avoid per-packet spinlock of XDP napi ring on enqueueing Date: Tue, 24 Apr 2018 23:39:23 +0900 Message-Id: <20180424143923.26519-10-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita Use percpu temporary storage to avoid per-packet spinlock. This is different from dequeue in that multiple veth devices can be redirect target in one napi loop so allocate percpu storage in veth private structure. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1592119e3873..5978d76f2c00 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -38,12 +38,18 @@ struct pcpu_vstats { struct u64_stats_sync syncp; }; +struct xdp_queue { + void *q[VETH_XDP_QUEUE_SIZE]; + unsigned int len; +}; + struct veth_priv { struct napi_struct xdp_napi; struct net_device *dev; struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; + struct xdp_queue __percpu *xdp_produce_q; struct xdp_mem_info xdp_mem; unsigned requested_headroom; bool rx_notify_masked; @@ -147,8 +153,48 @@ static void veth_ptr_free(void *ptr) } } +static void veth_xdp_cleanup_queues(struct veth_priv *priv) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct xdp_queue *q = per_cpu_ptr(priv->xdp_produce_q, cpu); + int i; + + for (i = 0; i < q->len; i++) + veth_ptr_free(q->q[i]); + + q->len = 0; + } +} + +static bool veth_xdp_flush_queue(struct veth_priv *priv) +{ + struct xdp_queue *q = this_cpu_ptr(priv->xdp_produce_q); + int i; + + if (unlikely(!q->len)) + return false; + + spin_lock(&priv->xdp_ring.producer_lock); + for (i = 0; i < q->len; i++) { + void *ptr = q->q[i]; + + if (unlikely(__ptr_ring_produce(&priv->xdp_ring, ptr))) + veth_ptr_free(ptr); + } + spin_unlock(&priv->xdp_ring.producer_lock); + + q->len = 0; + + return true; +} + static void __veth_xdp_flush(struct veth_priv *priv) { + if (unlikely(!veth_xdp_flush_queue(priv))) + return; + /* Write ptr_ring before reading rx_notify_masked */ smp_mb(); if (!priv->rx_notify_masked) { @@ -159,9 +205,13 @@ static void __veth_xdp_flush(struct veth_priv *priv) static int veth_xdp_enqueue(struct veth_priv *priv, void *ptr) { - if (unlikely(ptr_ring_produce(&priv->xdp_ring, ptr))) + struct xdp_queue *q = this_cpu_ptr(priv->xdp_produce_q); + + if (unlikely(q->len >= VETH_XDP_QUEUE_SIZE)) return -ENOSPC; + q->q[q->len++] = ptr; + return 0; } @@ -644,6 +694,7 @@ static void veth_napi_del(struct net_device *dev) napi_disable(&priv->xdp_napi); netif_napi_del(&priv->xdp_napi); + veth_xdp_cleanup_queues(priv); ptr_ring_cleanup(&priv->xdp_ring, veth_ptr_free); } @@ -711,15 +762,28 @@ static int is_valid_veth_mtu(int mtu) static int veth_dev_init(struct net_device *dev) { + struct veth_priv *priv = netdev_priv(dev); + dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; + + priv->xdp_produce_q = __alloc_percpu(sizeof(*priv->xdp_produce_q), + sizeof (void *)); + if (!priv->xdp_produce_q) { + free_percpu(dev->vstats); + return -ENOMEM; + } + return 0; } static void veth_dev_free(struct net_device *dev) { + struct veth_priv *priv = netdev_priv(dev); + free_percpu(dev->vstats); + free_percpu(priv->xdp_produce_q); } #ifdef CONFIG_NET_POLL_CONTROLLER