From patchwork Thu Dec 7 17:56:23 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Fastabend X-Patchwork-Id: 845736 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="nYTzZA54"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3yt38c2HlKz9s7B for ; Fri, 8 Dec 2017 04:57:04 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753323AbdLGR4q (ORCPT ); Thu, 7 Dec 2017 12:56:46 -0500 Received: from mail-pf0-f196.google.com ([209.85.192.196]:34071 "EHLO mail-pf0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753996AbdLGR4g (ORCPT ); Thu, 7 Dec 2017 12:56:36 -0500 Received: by mail-pf0-f196.google.com with SMTP id a90so5238457pfk.1 for ; Thu, 07 Dec 2017 09:56:36 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=subject:from:to:cc:date:message-id:in-reply-to:references :user-agent:mime-version:content-transfer-encoding; bh=6bElDWmyRlzPMVV0Lg+HSkeTJx53ErrTFQVRG7VKumY=; b=nYTzZA54QaBZLUBVf9XT8KXkgYHvxf5OXzMcS3HiKrcTy2tvAq6eaf1Akm8yVUjHYe dfYIuV64cq9ZFEaRp8egMRdRPTsoyAttK5v/l/lIjgPDCiKj3AF4kYVOocSmo0XuS+d9 gnXehAWx49SnPS59mI3ncNk9AkctuQxRLImrzuR9RwAOay650QePF9jhl6Map/vAmK3L pfo3nKkUGOQy4iO71qKLVJDlqQh7hF3py41XtTTwovd/nE+LRUHkA8YAvQiHHKokT1Ve Ez94OBuUCQjYuir1tP/JkO+y/3uIrC2gQAvCgA/HAyK25D/76teDMhQifsoBDTH5qaZj faCw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:subject:from:to:cc:date:message-id:in-reply-to :references:user-agent:mime-version:content-transfer-encoding; bh=6bElDWmyRlzPMVV0Lg+HSkeTJx53ErrTFQVRG7VKumY=; b=e6rze656ktH4f7bej8zf6nrLlW6EQySTnQx54Pk8TPC+zj5T8tWDxWN5NnOD/7XPcg mrjdkvb2ES/t7LZYoeXY5obphT4SmmUQNlkrTG+N5HQa3uwbVP0MzgdTS+cQT3kLr7DZ hmqdM2XPmVaPooZw5de+wzny/hPDEzdnSImsaBpRUntXFhOFN/m9Lta1Zqfjq+XZ1JH3 yUr6z/pn7Ap3Q6N7zOpKAGRG1YgWU/mHx1j165Q54HJHL2gu7gvrQ8sXpy8fDcVTYSLV VUpLvrAbhGTnkvo0qIYu3qAgalqiYeH4ugJKcHOnUBUO6O1TfMXCTlxH5tSl+wlckbny HQ/w== X-Gm-Message-State: AJaThX7E6p9Yp9rh1zshZwImjxGiX/Wct/JqiLA9xm47YFkxF9+rfGxn L4voOiJyCnF9C60I/dR3kJA= X-Google-Smtp-Source: AGs4zMaSpEcPTGeVvaNjOejs2DABwtb9Fm6gQMuKrg7Ui98EGzLICsb8plsnaOJwR4e/lbV/xN27wQ== X-Received: by 10.84.176.65 with SMTP id u59mr26783445plb.419.1512669395926; Thu, 07 Dec 2017 09:56:35 -0800 (PST) Received: from [127.0.1.1] ([72.168.144.118]) by smtp.gmail.com with ESMTPSA id t23sm11613411pfg.97.2017.12.07.09.56.28 (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 07 Dec 2017 09:56:35 -0800 (PST) Subject: [net-next PATCH 08/14] net: sched: use skb list for skb_bad_tx From: John Fastabend To: willemdebruijn.kernel@gmail.com, daniel@iogearbox.net, eric.dumazet@gmail.com, davem@davemloft.net Cc: netdev@vger.kernel.org, jiri@resnulli.us, xiyou.wangcong@gmail.com Date: Thu, 07 Dec 2017 09:56:23 -0800 Message-ID: <20171207175622.5771.61038.stgit@john-Precision-Tower-5810> In-Reply-To: <20171207173500.5771.41198.stgit@john-Precision-Tower-5810> References: <20171207173500.5771.41198.stgit@john-Precision-Tower-5810> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Similar to how gso is handled use skb list for skb_bad_tx this is required with lockless qdiscs because we may have multiple cores attempting to push skbs into skb_bad_tx concurrently Signed-off-by: John Fastabend --- include/net/sch_generic.h | 2 - net/sched/sch_generic.c | 106 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 87 insertions(+), 21 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6e329f0..4717c4b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -95,7 +95,7 @@ struct Qdisc { struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; - struct sk_buff *skb_bad_txq; + struct sk_buff_head skb_bad_txq; int padded; refcount_t refcnt; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 482ba22..84cef05 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,6 +45,68 @@ * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex. */ + +static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) +{ + const struct netdev_queue *txq = q->dev_queue; + spinlock_t *lock = NULL; + struct sk_buff *skb; + + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); + } + + skb = skb_peek(&q->skb_bad_txq); + if (skb) { + /* check the reason of requeuing without tx lock first */ + txq = skb_get_tx_queue(txq->dev, skb); + if (!netif_xmit_frozen_or_stopped(txq)) { + skb = __skb_dequeue(&q->skb_bad_txq); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_dec(q, skb); + qdisc_qstats_cpu_qlen_dec(q); + } else { + qdisc_qstats_backlog_dec(q, skb); + q->q.qlen--; + } + } else { + skb = NULL; + } + } + + if (lock) + spin_unlock(lock); + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q) +{ + struct sk_buff *skb = skb_peek(&q->skb_bad_txq); + + if (unlikely(skb)) + skb = __skb_dequeue_bad_txq(q); + + return skb; +} + +static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, + struct sk_buff *skb) +{ + spinlock_t *lock = NULL; + + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); + } + + __skb_queue_tail(&q->skb_bad_txq, skb); + + if (lock) + spin_unlock(lock); +} + static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { __skb_queue_head(&q->gso_skb, skb); @@ -117,9 +179,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, if (!nskb) break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { - q->skb_bad_txq = nskb; - qdisc_qstats_backlog_inc(q, nskb); - q->q.qlen++; + qdisc_enqueue_skb_bad_txq(q, nskb); + + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_inc(q, nskb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + qdisc_qstats_backlog_inc(q, nskb); + q->q.qlen++; + } break; } skb->next = nskb; @@ -180,19 +248,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, } validate: *validate = true; - skb = q->skb_bad_txq; - if (unlikely(skb)) { - /* check the reason of requeuing without tx lock first */ - txq = skb_get_tx_queue(txq->dev, skb); - if (!netif_xmit_frozen_or_stopped(txq)) { - q->skb_bad_txq = NULL; - qdisc_qstats_backlog_dec(q, skb); - q->q.qlen--; - goto bulk; - } - skb = NULL; - goto trace; - } + skb = qdisc_dequeue_skb_bad_txq(q); + if (unlikely(skb)) + goto bulk; if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) skb = q->dequeue(q); @@ -680,6 +738,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; } __skb_queue_head_init(&sch->gso_skb); + __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); spin_lock_init(&sch->q.lock); @@ -753,14 +812,16 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); - kfree_skb(qdisc->skb_bad_txq); - qdisc->skb_bad_txq = NULL; - skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { __skb_unlink(skb, &qdisc->gso_skb); kfree_skb_list(skb); } + skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { + __skb_unlink(skb, &qdisc->skb_bad_txq); + kfree_skb_list(skb); + } + qdisc->q.qlen = 0; qdisc->qstats.backlog = 0; } @@ -804,7 +865,11 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(skb); } - kfree_skb(qdisc->skb_bad_txq); + skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { + __skb_unlink(skb, &qdisc->skb_bad_txq); + kfree_skb_list(skb); + } + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); @@ -1042,6 +1107,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; __skb_queue_head_init(&qdisc->gso_skb); + __skb_queue_head_init(&qdisc->skb_bad_txq); } void dev_init_scheduler(struct net_device *dev)