From patchwork Fri Jul 18 14:20:38 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ming Lei X-Patchwork-Id: 371556 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) by ozlabs.org (Postfix) with ESMTP id D7E1514010C; Sat, 19 Jul 2014 00:21:12 +1000 (EST) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1X8924-0003ER-B2; Fri, 18 Jul 2014 14:21:08 +0000 Received: from mail-pd0-f171.google.com ([209.85.192.171]) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1X891z-00039e-HA for kernel-team@lists.ubuntu.com; Fri, 18 Jul 2014 14:21:03 +0000 Received: by mail-pd0-f171.google.com with SMTP id z10so5103905pdj.16 for ; Fri, 18 Jul 2014 07:21:02 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=6zVm6iVT3dQyKCnEE+siZfA+4luiiwBb1F208WpU/mE=; b=YfzEPURRLn/jq4yrUXdwnuzl5q6spkwOQpHm08T5QusoOVMkXsSgLrw9I9zh89B7Uj UNridYbMh4sp5AkfmrZYBumelE0n3oTucj48Jvedw1qwJq9J7sq0Q+9X3BGwjbpmFsDc z1716YUCPm7D6/UcINpRm2+cZ9nSJxKOi9JQJIk3tgIhCPJ7xMQ9FNDcixC7lpvSLgKr Clmeop4TKo0yyHSNyMg5cQwwUla5GpV+bhZSk4C8P2jrz5TwHLXk2B/oG7xXdtlNrUOC gdo8YdQ6QbrleMd87W8DdxYY+UIPOKqO2heGajNUm92vqX6uQsHZgUt/dWeMm7VvXn3g EGQA== X-Received: by 10.70.138.45 with SMTP id qn13mr5635906pdb.20.1405693262408; Fri, 18 Jul 2014 07:21:02 -0700 (PDT) Received: from localhost ([183.14.17.82]) by mx.google.com with ESMTPSA id uj2sm23646417pab.14.2014.07.18.07.20.59 for (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Fri, 18 Jul 2014 07:21:01 -0700 (PDT) From: Ming Lei To: kernel-team@lists.ubuntu.com Subject: [PATCH trusty SRU 1/5] net/mlx4_core: Enforce irq affinity changes immediatly Date: Fri, 18 Jul 2014 22:20:38 +0800 Message-Id: <1405693242-3929-2-git-send-email-ming.lei@canonical.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1405693242-3929-1-git-send-email-ming.lei@canonical.com> References: <1405693242-3929-1-git-send-email-ming.lei@canonical.com> X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.14 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: kernel-team-bounces@lists.ubuntu.com From: Yuval Atias During heavy traffic, napi is constatntly polling the complition queue and no interrupt is fired. Because of that, changes to irq affinity are ignored until traffic is stopped and resumed. By registering to the irq notifier mechanism, and forcing interrupt when affinity is changed, irq affinity changes will be immediatly enforced. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller (cherry picked from commit 2eacc23c422b4553030168f315cb49522fa1b1f6) Signed-off-by: Ming Lei --- drivers/net/ethernet/mellanox/mlx4/cq.c | 3 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 ++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +++ drivers/net/ethernet/mellanox/mlx4/eq.c | 62 ++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 ++ 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 34c5b87..111b184 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -294,6 +294,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, atomic_set(&cq->refcount, 1); init_completion(&cq->free); + cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq_affinity_change = false; + return 0; err_radix: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d7ab0fe..f9e1226 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -894,10 +894,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) mlx4_en_cq_unlock_napi(cq); /* If we used up all the quota - we're probably not done yet... */ - if (done == budget) + if (done == budget) { INC_PERF_COUNTER(priv->pstats.napi_quota); - else { + if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; + } + } else { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index dd1f6d3..493d9f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done < budget) { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return done; + } else if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; } return budget; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f856e26..46af0e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -54,6 +54,11 @@ enum { MLX4_EQ_ENTRY_SIZE = 0x20 }; +struct mlx4_irq_notify { + void *arg; + struct irq_affinity_notify notify; +}; + #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) @@ -1103,6 +1108,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev) iounmap(priv->clr_base); } +static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct mlx4_irq_notify *n = container_of(notify, + struct mlx4_irq_notify, + notify); + struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { + struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); + + if (cq->irq == notify->irq) + cq->irq_affinity_change = true; + } +} + +static void mlx4_release_irq_notifier(struct kref *ref) +{ + struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, + notify.kref); + kfree(n); +} + +static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, + struct mlx4_dev *dev, int irq) +{ + struct mlx4_irq_notify *irq_notifier = NULL; + int err = 0; + + irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); + if (!irq_notifier) { + mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", + irq); + return; + } + + irq_notifier->notify.irq = irq; + irq_notifier->notify.notify = mlx4_irq_notifier_notify; + irq_notifier->notify.release = mlx4_release_irq_notifier; + irq_notifier->arg = priv; + err = irq_set_affinity_notifier(irq, &irq_notifier->notify); + if (err) { + kfree(irq_notifier); + irq_notifier = NULL; + mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); + } +} + + int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1373,6 +1429,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, continue; /*we dont want to break here*/ } + mlx4_assign_irq_notifier(priv, dev, + priv->eq_table.eq[vec].irq); + eq_set_ci(&priv->eq_table.eq[vec], 1); } } @@ -1399,6 +1458,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { + irq_set_affinity_notifier( + priv->eq_table.eq[vec].irq, + NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd2..c0468e6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -577,6 +577,9 @@ struct mlx4_cq { u32 cons_index; + u16 irq; + bool irq_affinity_change; + __be32 *set_ci_db; __be32 *arm_db; int arm_sn;