From patchwork Sun Jun 24 05:14:02 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Miller X-Patchwork-Id: 933850 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=davemloft.net Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 41D0pX36zpz9s01 for ; Sun, 24 Jun 2018 15:14:12 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751397AbeFXFOH (ORCPT ); Sun, 24 Jun 2018 01:14:07 -0400 Received: from shards.monkeyblade.net ([23.128.96.9]:57128 "EHLO shards.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751070AbeFXFOG (ORCPT ); Sun, 24 Jun 2018 01:14:06 -0400 Received: from localhost (unknown [211.196.191.92]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) (Authenticated sender: davem-davemloft) by shards.monkeyblade.net (Postfix) with ESMTPSA id 1092E1094A613; Sat, 23 Jun 2018 22:14:04 -0700 (PDT) Date: Sun, 24 Jun 2018 14:14:02 +0900 (KST) Message-Id: <20180624.141402.1501131546272602021.davem@davemloft.net> To: netdev@vger.kernel.org CC: edumazet@google.com Subject: [PATCH RFC 2/2] net: Convert NAPI gro list into a small hash table. From: David Miller X-Mailer: Mew version 6.7 on Emacs 26 / Mule 6.0 (HANACHIRUSATO) Mime-Version: 1.0 X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.5.12 (shards.monkeyblade.net [149.20.54.216]); Sat, 23 Jun 2018 22:14:06 -0700 (PDT) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Improve the performance of GRO receive by splitting flows into multiple hash chains. Suggested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +- net/core/dev.c | 105 ++++++++++++++++++++++++++++---------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f176d9873910..c6b377a15869 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str); /* * Structure for NAPI scheduling similar to tasklet but with weighting */ +#define GRO_HASH_BUCKETS 8 struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means @@ -322,7 +323,7 @@ struct napi_struct { int poll_owner; #endif struct net_device *dev; - struct list_head gro_list; + struct list_head gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; struct hrtimer timer; struct list_head dev_list; diff --git a/net/core/dev.c b/net/core/dev.c index aa61b9344b46..dffed642e686 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4875,15 +4875,12 @@ static int napi_gro_complete(struct sk_buff *skb) return netif_receive_skb_internal(skb); } -/* napi->gro_list contains packets ordered by age. - * youngest packets at the head of it. - * Complete skbs in reverse order to reduce latencies. - */ -void napi_gro_flush(struct napi_struct *napi, bool flush_old) +static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head, + bool flush_old) { struct sk_buff *skb, *p; - list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) { + list_for_each_entry_safe_reverse(skb, p, head, list) { if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; list_del_init(&skb->list); @@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) napi->gro_count--; } } + +/* napi->gro_hash contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) +{ + int i; + + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct list_head *head = &napi->gro_hash[i]; + + __napi_gro_flush_chain(napi, head, flush_old); + } +} EXPORT_SYMBOL(napi_gro_flush); -static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) +static struct list_head *gro_list_prepare(struct napi_struct *napi, + struct sk_buff *skb) { unsigned int maclen = skb->dev->hard_header_len; u32 hash = skb_get_hash_raw(skb); + struct list_head *head; struct sk_buff *p; - list_for_each_entry(p, &napi->gro_list, list) { + head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)]; + list_for_each_entry(p, head, list) { unsigned long diffs; NAPI_GRO_CB(p)->flush = 0; @@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } + + return head; } static void skb_gro_reset_offset(struct sk_buff *skb) @@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } +static void gro_flush_oldest(struct napi_struct *napi) +{ + struct sk_buff *oldest = NULL; + unsigned long age = jiffies; + int i; + + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct list_head *head = &napi->gro_hash[i]; + struct sk_buff *skb; + + if (list_empty(head)) + continue; + + skb = list_last_entry(head, struct sk_buff, list); + if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) { + oldest = skb; + age = NAPI_GRO_CB(skb)->age; + } + } + + /* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is + * impossible. + */ + if (WARN_ON_ONCE(!oldest)) + return; + + /* Do not adjust napi->gro_count, caller is adding a new SKB to + * the chain. + */ + list_del(&oldest->list); + napi_gro_complete(oldest); +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct list_head *head = &offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; + struct list_head *gro_head; struct sk_buff *pp = NULL; enum gro_result ret; int same_flow; @@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (netif_elide_gro(skb->dev)) goto normal; - gro_list_prepare(napi, skb); + gro_head = gro_list_prepare(napi, skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->csum_valid = 0; } - pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(gro_head, skb); break; } rcu_read_unlock(); @@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { - struct sk_buff *nskb; - - nskb = list_last_entry(&napi->gro_list, struct sk_buff, list); - list_del(&nskb->list); - napi_gro_complete(nskb); + gro_flush_oldest(napi); } else { napi->gro_count++; } @@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); - list_add(&skb->list, &napi->gro_list); + list_add(&skb->list, gro_head); ret = GRO_HELD; pull: @@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - if (!list_empty(&n->gro_list)) { + if (n->gro_count) { unsigned long timeout = 0; if (work_done) @@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) /* Note : we use a relaxed variant of napi_schedule_prep() not setting * NAPI_STATE_MISSED, since we do not react to a device IRQ. */ - if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) && + if (napi->gro_count && !napi_disable_pending(napi) && !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) __napi_schedule_irqoff(napi); @@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { + int i; + INIT_LIST_HEAD(&napi->poll_list); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; napi->gro_count = 0; - INIT_LIST_HEAD(&napi->gro_list); + for (i = 0; i < GRO_HASH_BUCKETS; i++) + INIT_LIST_HEAD(&napi->gro_hash[i]); napi->skb = NULL; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) @@ -5714,12 +5764,16 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); -static void gro_list_free(struct list_head *head) +static void flush_gro_hash(struct napi_struct *napi) { - struct sk_buff *skb, *p; + int i; - list_for_each_entry_safe(skb, p, head, list) - kfree_skb(skb); + for (i = 0; i < GRO_HASH_BUCKETS; i++) { + struct sk_buff *skb, *n; + + list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list) + kfree_skb(skb); + } } /* Must be called in process context */ @@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi) list_del_init(&napi->dev_list); napi_free_frags(napi); - gro_list_free(&napi->gro_list); - INIT_LIST_HEAD(&napi->gro_list); + flush_gro_hash(napi); napi->gro_count = 0; } EXPORT_SYMBOL(netif_napi_del); @@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } - if (!list_empty(&n->gro_list)) { + if (n->gro_count) { /* flush too old packets * If HZ < 1000, flush all packets. */