From patchwork Thu Feb 14 20:47:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042431 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qTk5PxQz9s7h for ; Fri, 15 Feb 2019 08:36:42 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2440252AbfBNVgk (ORCPT ); Thu, 14 Feb 2019 16:36:40 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51337 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2395168AbfBNVgi (ORCPT ); Thu, 14 Feb 2019 16:36:38 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgB-0002C5-6G; Thu, 14 Feb 2019 22:36:23 +0100 Message-Id: <20190214211759.417990441@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:47:56 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 1/8] genirq/affinity: Code consolidation References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org All information and calculations in the interrupt affinity spreading code is strictly unsigned int. Though the code uses int all over the place. Convert it over to unsigned int. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 20 +++++++++------- kernel/irq/affinity.c | 56 ++++++++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 38 deletions(-) --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -251,10 +251,10 @@ struct irq_affinity_notify { * @sets: Number of affinitized sets */ struct irq_affinity { - int pre_vectors; - int post_vectors; - int nr_sets; - int *sets; + unsigned int pre_vectors; + unsigned int post_vectors; + unsigned int nr_sets; + unsigned int *sets; }; /** @@ -314,9 +314,10 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); +irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd); -int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -350,13 +351,14 @@ irq_set_affinity_notifier(unsigned int i } static inline struct irq_affinity_desc * -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd) { return NULL; } -static inline int -irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) +static inline unsigned int +irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) { return maxvec; } --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -9,7 +9,7 @@ #include static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, - int cpus_per_vec) + unsigned int cpus_per_vec) { const struct cpumask *siblmsk; int cpu, sibl; @@ -95,15 +95,17 @@ static int get_nodes_in_cpumask(cpumask_ } static int __irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, + unsigned int startvec, + unsigned int numvecs, + unsigned int firstvec, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct irq_affinity_desc *masks) { - int n, nodes, cpus_per_vec, extra_vecs, done = 0; - int last_affv = firstvec + numvecs; - int curvec = startvec; + unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0; + unsigned int last_affv = firstvec + numvecs; + unsigned int curvec = startvec; nodemask_t nodemsk = NODE_MASK_NONE; if (!cpumask_weight(cpu_mask)) @@ -117,18 +119,16 @@ static int __irq_build_affinity_masks(co */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, - &masks[curvec].mask, - node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, + node_to_cpumask[n]); if (++curvec == last_affv) curvec = firstvec; } - done = numvecs; - goto out; + return numvecs; } for_each_node_mask(n, nodemsk) { - int ncpus, v, vecs_to_assign, vecs_per_node; + unsigned int ncpus, v, vecs_to_assign, vecs_per_node; /* Spread the vectors per node */ vecs_per_node = (numvecs - (curvec - firstvec)) / nodes; @@ -163,8 +163,6 @@ static int __irq_build_affinity_masks(co curvec = firstvec; --nodes; } - -out: return done; } @@ -174,13 +172,14 @@ static int __irq_build_affinity_masks(co * 2) spread other possible CPUs on these vectors */ static int irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, + unsigned int startvec, unsigned int numvecs, + unsigned int firstvec, struct irq_affinity_desc *masks) { - int curvec = startvec, nr_present, nr_others; - int ret = -ENOMEM; - cpumask_var_t nmsk, npresmsk; + unsigned int curvec = startvec, nr_present, nr_others; cpumask_var_t *node_to_cpumask; + cpumask_var_t nmsk, npresmsk; + int ret = -ENOMEM; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return ret; @@ -239,12 +238,10 @@ static int irq_build_affinity_masks(cons * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) { - int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - int curvec, usedvecs; + unsigned int affvecs, curvec, usedvecs, nr_sets, i; struct irq_affinity_desc *masks = NULL; - int i, nr_sets; /* * If there aren't any vectors left after applying the pre/post @@ -264,16 +261,17 @@ irq_create_affinity_masks(int nvecs, con * Spread on present CPUs starting from affd->pre_vectors. If we * have multiple sets, build each sets affinity mask separately. */ + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; nr_sets = affd->nr_sets; if (!nr_sets) nr_sets = 1; for (i = 0, usedvecs = 0; i < nr_sets; i++) { - int this_vecs = affd->sets ? affd->sets[i] : affvecs; + unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - curvec, masks); + curvec, masks); if (ret) { kfree(masks); return NULL; @@ -303,17 +301,17 @@ irq_create_affinity_masks(int nvecs, con * @maxvec: The maximum number of vectors available * @affd: Description of the affinity requirements */ -int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) { - int resv = affd->pre_vectors + affd->post_vectors; - int vecs = maxvec - resv; - int set_vecs; + unsigned int resv = affd->pre_vectors + affd->post_vectors; + unsigned int set_vecs; if (resv > minvec) return 0; if (affd->nr_sets) { - int i; + unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) set_vecs += affd->sets[i]; @@ -323,5 +321,5 @@ int irq_calc_affinity_vectors(int minvec put_online_cpus(); } - return resv + min(set_vecs, vecs); + return resv + min(set_vecs, maxvec - resv); } From patchwork Thu Feb 14 20:47:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042430 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qTg05Dzz9s7h for ; Fri, 15 Feb 2019 08:36:38 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2437831AbfBNVgh (ORCPT ); Thu, 14 Feb 2019 16:36:37 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51333 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2395160AbfBNVgh (ORCPT ); Thu, 14 Feb 2019 16:36:37 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgC-0002C8-3R; Thu, 14 Feb 2019 22:36:24 +0100 Message-Id: <20190214211759.507891465@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:47:57 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 2/8] genirq/affinity: Store interrupt sets size in struct irq_affinity References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org From: Ming Lei The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback will be added to struct affinity_desc, which will be invoked by the core code. The callback will get the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. To support this, two modifications for the handling of struct irq_affinity are required: 1) The (optional) interrupt sets size information is contained in a separate array of integers and struct irq_affinity contains a pointer to it. This is cumbersome and as the maximum number of interrupt sets is small, there is no reason to have separate storage. Moving the size array into struct affinity_desc avoids indirections and makes the code simpler. 2) At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const'. With the upcoming callback to recalculate the number and size of interrupt sets, it's necessary to remove the 'const' qualifier. Otherwise the callback would not be able to update the data. Implement #1 and store the interrupt sets size in 'struct irq_affinity'. No functional change. [ tglx: Fixed the memcpy() size so it won't copy beyond the size of the source. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner --- drivers/nvme/host/pci.c | 7 +++---- include/linux/interrupt.h | 9 ++++++--- kernel/irq/affinity.c | 16 ++++++++++++---- 3 files changed, 21 insertions(+), 11 deletions(-) --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2081,12 +2081,11 @@ static void nvme_calc_io_queues(struct n static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) { struct pci_dev *pdev = to_pci_dev(dev->dev); - int irq_sets[2]; struct irq_affinity affd = { - .pre_vectors = 1, - .nr_sets = ARRAY_SIZE(irq_sets), - .sets = irq_sets, + .pre_vectors = 1, + .nr_sets = 2, }; + unsigned int *irq_sets = affd.set_size; int result = 0; unsigned int irq_queues, this_p_queues; --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -241,20 +241,23 @@ struct irq_affinity_notify { void (*release)(struct kref *ref); }; +#define IRQ_AFFINITY_MAX_SETS 4 + /** * struct irq_affinity - Description for automatic irq affinity assignements * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of * the MSI(-X) vector space - * @nr_sets: Length of passed in *sets array - * @sets: Number of affinitized sets + * @nr_sets: The number of interrupt sets for which affinity + * spreading is required + * @set_size: Array holding the size of each interrupt set */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; - unsigned int *sets; + unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; }; /** --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -238,9 +238,10 @@ static int irq_build_affinity_masks(cons * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) { unsigned int affvecs, curvec, usedvecs, nr_sets, i; + unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; struct irq_affinity_desc *masks = NULL; /* @@ -250,6 +251,9 @@ irq_create_affinity_masks(unsigned int n if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; + if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) + return NULL; + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; @@ -263,11 +267,15 @@ irq_create_affinity_masks(unsigned int n */ affvecs = nvecs - affd->pre_vectors - affd->post_vectors; nr_sets = affd->nr_sets; - if (!nr_sets) + if (!nr_sets) { nr_sets = 1; + set_size[0] = affvecs; + } else { + memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int)); + } for (i = 0, usedvecs = 0; i < nr_sets; i++) { - unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs; + unsigned int this_vecs = set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, @@ -314,7 +322,7 @@ unsigned int irq_calc_affinity_vectors(u unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) - set_vecs += affd->sets[i]; + set_vecs += affd->set_size[i]; } else { get_online_cpus(); set_vecs = cpumask_weight(cpu_possible_mask); From patchwork Thu Feb 14 20:47:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042438 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVf1Bywz9sCh for ; Fri, 15 Feb 2019 08:37:30 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2440256AbfBNVgl (ORCPT ); Thu, 14 Feb 2019 16:36:41 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51334 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2436452AbfBNVgk (ORCPT ); Thu, 14 Feb 2019 16:36:40 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgD-0002CB-1b; Thu, 14 Feb 2019 22:36:25 +0100 Message-Id: <20190214211759.595619244@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:47:58 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 3/8] genirq/affinity: Add new callback for (re)calculating interrupt sets References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org From: Ming Lei The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback will be added to struct irq_affinity, which will be invoked by the core code. The callback will get the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const', but for the callback to be able to modify the data in the struct it's required to remove the 'const' qualifier. Add the optional callback to struct irq_affinity, which allows drivers to recalculate the number and size of interrupt sets and remove the 'const' qualifier. For simple invocations, which do not supply a callback, a default callback is installed, which just sets nr_sets to 1 and transfers the number of spreadable vectors to the set_size array at index 0. This is for now guarded by a check for nr_sets != 0 to keep the NVME driver working until it is converted to the callback mechanism. [ tglx: Fixed the simple case (no sets required). Moved the sanity check for nr_sets after the invocation of the callback so it catches broken drivers. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 18 ++++++++-------- drivers/scsi/be2iscsi/be_main.c | 2 - include/linux/interrupt.h | 10 +++++++- include/linux/pci.h | 4 +-- kernel/irq/affinity.c | 45 +++++++++++++++++++++++++++------------- 5 files changed, 51 insertions(+), 28 deletions(-) --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -532,7 +532,7 @@ static int populate_msi_sysfs(struct pci } static struct msi_desc * -msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) +msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; struct msi_desc *entry; @@ -597,7 +597,7 @@ static int msi_verify_entries(struct pci * which could have been allocated. */ static int msi_capability_init(struct pci_dev *dev, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct msi_desc *entry; int ret; @@ -669,7 +669,7 @@ static void __iomem *msix_map_region(str static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msix_entry *entries, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; @@ -736,7 +736,7 @@ static void msix_program_entries(struct * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int ret; u16 control; @@ -932,7 +932,7 @@ int pci_msix_vec_count(struct pci_dev *d EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int nr_entries; int i, j; @@ -1018,7 +1018,7 @@ int pci_msi_enabled(void) EXPORT_SYMBOL(pci_msi_enabled); static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { int nvec; int rc; @@ -1086,7 +1086,7 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, const struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd) { int rc, nvec = maxvec; @@ -1165,9 +1165,9 @@ EXPORT_SYMBOL(pci_enable_msix_range); */ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd) + struct irq_affinity *affd) { - static const struct irq_affinity msi_default_affd; + struct irq_affinity msi_default_affd = {0}; int msix_vecs = -ENOSPC; int msi_vecs = -ENOSPC; --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -3566,7 +3566,7 @@ static void be2iscsi_enable_msix(struct /* if eqid_count == 1 fall back to INTX */ if (enable_msix && nvec > 1) { - const struct irq_affinity desc = { .post_vectors = 1 }; + struct irq_affinity desc = { .post_vectors = 1 }; if (pci_alloc_irq_vectors_affinity(phba->pcidev, 2, nvec, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &desc) < 0) { --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -252,12 +252,18 @@ struct irq_affinity_notify { * @nr_sets: The number of interrupt sets for which affinity * spreading is required * @set_size: Array holding the size of each interrupt set + * @calc_sets: Callback for calculating the number and size + * of interrupt sets + * @priv: Private data for usage by @calc_sets, usually a + * pointer to driver/device specific data. */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; + void (*calc_sets)(struct irq_affinity *, unsigned int nvecs); + void *priv; }; /** @@ -317,7 +323,7 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd); +irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd); unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd); @@ -354,7 +360,7 @@ irq_set_affinity_notifier(unsigned int i } static inline struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd) { return NULL; } --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1393,7 +1393,7 @@ static inline int pci_enable_msix_exact( } int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd); + struct irq_affinity *affd); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); @@ -1419,7 +1419,7 @@ static inline int pci_enable_msix_exact( static inline int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *aff_desc) + struct irq_affinity *aff_desc) { if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) return 1; --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -230,6 +230,12 @@ static int irq_build_affinity_masks(cons return ret; } +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) +{ + affd->nr_sets = 1; + affd->set_size[0] = affvecs; +} + /** * irq_create_affinity_masks - Create affinity masks for multiqueue spreading * @nvecs: The total number of vectors @@ -240,8 +246,7 @@ static int irq_build_affinity_masks(cons struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) { - unsigned int affvecs, curvec, usedvecs, nr_sets, i; - unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; + unsigned int affvecs, curvec, usedvecs, i; struct irq_affinity_desc *masks = NULL; /* @@ -251,6 +256,24 @@ irq_create_affinity_masks(unsigned int n if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; + /* + * Simple invocations do not provide a calc_sets() + * callback. Install the generic one. The check for affd->nr_sets + * is a temporary workaround and will be removed after the NVME + * driver is converted over. + */ + if (!affd->nr_sets && !affd->calc_sets) + affd->calc_sets = default_calc_sets; + + /* + * If the device driver provided a calc_sets() callback let it + * recalculate the number of sets and their size. The check will go + * away once the NVME driver is converted over. + */ + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; + if (affd->calc_sets) + affd->calc_sets(affd, affvecs); + if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) return NULL; @@ -261,21 +284,13 @@ irq_create_affinity_masks(unsigned int n /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(&masks[curvec].mask, irq_default_affinity); + /* * Spread on present CPUs starting from affd->pre_vectors. If we * have multiple sets, build each sets affinity mask separately. */ - affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - nr_sets = affd->nr_sets; - if (!nr_sets) { - nr_sets = 1; - set_size[0] = affvecs; - } else { - memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int)); - } - - for (i = 0, usedvecs = 0; i < nr_sets; i++) { - unsigned int this_vecs = set_size[i]; + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { + unsigned int this_vecs = affd->set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, @@ -318,7 +333,9 @@ unsigned int irq_calc_affinity_vectors(u if (resv > minvec) return 0; - if (affd->nr_sets) { + if (affd->calc_sets) { + set_vecs = maxvec - resv; + } else if (affd->nr_sets) { unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) From patchwork Thu Feb 14 20:47:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042432 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVB4nNcz9s5c for ; Fri, 15 Feb 2019 08:37:06 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2437899AbfBNVg5 (ORCPT ); Thu, 14 Feb 2019 16:36:57 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51367 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2440296AbfBNVgz (ORCPT ); Thu, 14 Feb 2019 16:36:55 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgE-0002CE-0s; Thu, 14 Feb 2019 22:36:26 +0100 Message-Id: <20190214211759.699390983@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:47:59 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 4/8] nvme-pci: Simplify interrupt allocation References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org From: Ming Lei The NVME PCI driver contains a tedious mechanism for interrupt allocation, which is necessary to adjust the number and size of interrupt sets to the maximum available number of interrupts which depends on the underlying PCI capabilities and the available CPU resources. It works around the former short comings of the PCI and core interrupt allocation mechanims in combination with interrupt sets. The PCI interrupt allocation function allows to provide a maximum and a minimum number of interrupts to be allocated and tries to allocate as many as possible. This worked without driver interaction as long as there was only a single set of interrupts to handle. With the addition of support for multiple interrupt sets in the generic affinity spreading logic, which is invoked from the PCI interrupt allocation, the adaptive loop in the PCI interrupt allocation did not work for multiple interrupt sets. The reason is that depending on the total number of interrupts which the PCI allocation adaptive loop tries to allocate in each step, the number and the size of the interrupt sets need to be adapted as well. Due to the way the interrupt sets support was implemented there was no way for the PCI interrupt allocation code or the core affinity spreading mechanism to invoke a driver specific function for adapting the interrupt sets configuration. As a consequence the driver had to implement another adaptive loop around the PCI interrupt allocation function and calling that with maximum and minimum interrupts set to the same value. This ensured that the allocation either succeeded or immediately failed without any attempt to adjust the number of interrupts in the PCI code. The core code now allows drivers to provide a callback to recalculate the number and the size of interrupt sets during PCI interrupt allocation, which in turn allows the PCI interrupt allocation function to be called in the same way as with a single set of interrupts. The PCI code handles the adaptive loop and the interrupt affinity spreading mechanism invokes the driver callback to adapt the interrupt set configuration to the current loop value. This replaces the adaptive loop in the driver completely. Implement the NVME specific callback which adjusts the interrupt sets configuration and remove the adaptive allocation loop. [ tglx: Simplify the callback further and restore the dropped adjustment of number of sets ] Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner --- drivers/nvme/host/pci.c | 108 ++++++++++++------------------------------------ 1 file changed, 28 insertions(+), 80 deletions(-) --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2041,41 +2041,32 @@ static int nvme_setup_host_mem(struct nv return ret; } -/* irq_queues covers admin queue */ -static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) +/* + * nirqs is the number of interrupts available for write and read + * queues. The core already reserved an interrupt for the admin queue. + */ +static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) { - unsigned int this_w_queues = write_queues; - - WARN_ON(!irq_queues); - - /* - * Setup read/write queue split, assign admin queue one independent - * irq vector if irq_queues is > 1. - */ - if (irq_queues <= 2) { - dev->io_queues[HCTX_TYPE_DEFAULT] = 1; - dev->io_queues[HCTX_TYPE_READ] = 0; - return; - } + struct nvme_dev *dev = affd->priv; + unsigned int nr_read_queues; /* - * If 'write_queues' is set, ensure it leaves room for at least - * one read queue and one admin queue - */ - if (this_w_queues >= irq_queues) - this_w_queues = irq_queues - 2; - - /* - * If 'write_queues' is set to zero, reads and writes will share - * a queue set. - */ - if (!this_w_queues) { - dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1; - dev->io_queues[HCTX_TYPE_READ] = 0; - } else { - dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; - dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1; - } + * If only one interrupt is available, combine write and read + * queues. If 'write_queues' is set, ensure it leaves room for at + * least one read queue. + */ + if (nrirqs == 1) + nr_read_queues = 0; + else if (write_queues >= nrirqs) + nr_read_queues = nrirqs - 1; + else + nr_read_queues = nrirqs - write_queues; + + dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; + affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; + dev->io_queues[HCTX_TYPE_READ] = nr_read_queues; + affd->set_size[HCTX_TYPE_READ] = nr_read_queues; + affd->nr_sets = nr_read_queues ? 2 : 1; } static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) @@ -2083,10 +2074,9 @@ static int nvme_setup_irqs(struct nvme_d struct pci_dev *pdev = to_pci_dev(dev->dev); struct irq_affinity affd = { .pre_vectors = 1, - .nr_sets = 2, + .calc_sets = nvme_calc_irq_sets, + .priv = dev, }; - unsigned int *irq_sets = affd.set_size; - int result = 0; unsigned int irq_queues, this_p_queues; /* @@ -2102,51 +2092,8 @@ static int nvme_setup_irqs(struct nvme_d } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; - /* - * For irq sets, we have to ask for minvec == maxvec. This passes - * any reduction back to us, so we can adjust our queue counts and - * IRQ vector needs. - */ - do { - nvme_calc_io_queues(dev, irq_queues); - irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; - irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; - if (!irq_sets[1]) - affd.nr_sets = 1; - - /* - * If we got a failure and we're down to asking for just - * 1 + 1 queues, just ask for a single vector. We'll share - * that between the single IO queue and the admin queue. - * Otherwise, we assign one independent vector to admin queue. - */ - if (irq_queues > 1) - irq_queues = irq_sets[0] + irq_sets[1] + 1; - - result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, - irq_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); - - /* - * Need to reduce our vec counts. If we get ENOSPC, the - * platform should support mulitple vecs, we just need - * to decrease our ask. If we get EINVAL, the platform - * likely does not. Back down to ask for just one vector. - */ - if (result == -ENOSPC) { - irq_queues--; - if (!irq_queues) - return result; - continue; - } else if (result == -EINVAL) { - irq_queues = 1; - continue; - } else if (result <= 0) - return -EIO; - break; - } while (1); - - return result; + return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); } static void nvme_disable_io_queues(struct nvme_dev *dev) @@ -3021,6 +2968,7 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { + BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); return pci_register_driver(&nvme_driver); } From patchwork Thu Feb 14 20:48:00 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042436 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVT5C28z9s5c for ; Fri, 15 Feb 2019 08:37:21 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2440282AbfBNVgt (ORCPT ); Thu, 14 Feb 2019 16:36:49 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51353 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2436452AbfBNVgq (ORCPT ); Thu, 14 Feb 2019 16:36:46 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgF-0002CU-3N; Thu, 14 Feb 2019 22:36:27 +0100 Message-Id: <20190214211759.803557905@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:48:00 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 5/8] genirq/affinity: Remove the leftovers of the original set support References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Now that the NVME driver is converted over to the calc_set() callback, the workarounds of the original set support can be removed. Signed-off-by: Thomas Gleixner --- kernel/irq/affinity.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -258,21 +258,17 @@ irq_create_affinity_masks(unsigned int n /* * Simple invocations do not provide a calc_sets() - * callback. Install the generic one. The check for affd->nr_sets - * is a temporary workaround and will be removed after the NVME - * driver is converted over. + * callback. Install the generic one. */ - if (!affd->nr_sets && !affd->calc_sets) + if (!affd->calc_sets) affd->calc_sets = default_calc_sets; /* * If the device driver provided a calc_sets() callback let it - * recalculate the number of sets and their size. The check will go - * away once the NVME driver is converted over. + * recalculate the number of sets and their size. */ affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - if (affd->calc_sets) - affd->calc_sets(affd, affvecs); + affd->calc_sets(affd, affvecs); if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) return NULL; @@ -335,11 +331,6 @@ unsigned int irq_calc_affinity_vectors(u if (affd->calc_sets) { set_vecs = maxvec - resv; - } else if (affd->nr_sets) { - unsigned int i; - - for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) - set_vecs += affd->set_size[i]; } else { get_online_cpus(); set_vecs = cpumask_weight(cpu_possible_mask); From patchwork Thu Feb 14 20:48:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042435 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVS1Y4Rz9s7h for ; Fri, 15 Feb 2019 08:37:20 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2440284AbfBNVgt (ORCPT ); Thu, 14 Feb 2019 16:36:49 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51354 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2440277AbfBNVgt (ORCPT ); Thu, 14 Feb 2019 16:36:49 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgG-0002CZ-1d; Thu, 14 Feb 2019 22:36:28 +0100 Message-Id: <20190214211759.893781190@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:48:01 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 6/8] PCI/MSI: Remove obsolete sanity checks for multiple interrupt sets References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Multiple interrupt sets for affinity spreading are now handled in the core code and the number of sets and their size is recalculated via a driver supplied callback. That avoids the requirement to invoke pci_alloc_irq_vectors_affinity() with the arguments minvecs and maxvecs set to the same value and the callsite handling the ENOSPC situation. Remove the now obsolete sanity checks and the related comments. Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 14 -------------- 1 file changed, 14 deletions(-) --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1035,13 +1035,6 @@ static int __pci_enable_msi_range(struct if (maxvec < minvec) return -ERANGE; - /* - * If the caller is passing in sets, we can't support a range of - * vectors. The caller needs to handle that. - */ - if (affd && affd->nr_sets && minvec != maxvec) - return -EINVAL; - if (WARN_ON_ONCE(dev->msi_enabled)) return -EINVAL; @@ -1093,13 +1086,6 @@ static int __pci_enable_msix_range(struc if (maxvec < minvec) return -ERANGE; - /* - * If the caller is passing in sets, we can't support a range of - * supported vectors. The caller needs to handle that. - */ - if (affd && affd->nr_sets && minvec != maxvec) - return -EINVAL; - if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; From patchwork Thu Feb 14 20:48:02 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042437 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVd1pHXz9s7h for ; Fri, 15 Feb 2019 08:37:29 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2388981AbfBNVhV (ORCPT ); Thu, 14 Feb 2019 16:37:21 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51355 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2440278AbfBNVgt (ORCPT ); Thu, 14 Feb 2019 16:36:49 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgG-0002Ce-Q7; Thu, 14 Feb 2019 22:36:29 +0100 Message-Id: <20190214211759.981965829@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:48:02 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 7/8] genirq/affinity: Set is_managed in the spreading function References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Some drivers need an extra set of interrupts which are not marked managed, but should get initial interrupt spreading. To achieve this it is simpler to set the is_managed bit of the affinity descriptor in the spreading function instead of having yet another loop and tons of conditionals. No functional change. Signed-off-by: Thomas Gleixner --- kernel/irq/affinity.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -98,6 +98,7 @@ static int __irq_build_affinity_masks(co unsigned int startvec, unsigned int numvecs, unsigned int firstvec, + bool managed, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, @@ -154,6 +155,7 @@ static int __irq_build_affinity_masks(co } irq_spread_init_one(&masks[curvec].mask, nmsk, cpus_per_vec); + masks[curvec].is_managed = managed; } done += v; @@ -173,7 +175,7 @@ static int __irq_build_affinity_masks(co */ static int irq_build_affinity_masks(const struct irq_affinity *affd, unsigned int startvec, unsigned int numvecs, - unsigned int firstvec, + unsigned int firstvec, bool managed, struct irq_affinity_desc *masks) { unsigned int curvec = startvec, nr_present, nr_others; @@ -197,8 +199,8 @@ static int irq_build_affinity_masks(cons build_node_to_cpumask(node_to_cpumask); /* Spread on present CPUs starting from affd->pre_vectors */ - nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, - firstvec, node_to_cpumask, + nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, firstvec, + managed, node_to_cpumask, cpu_present_mask, nmsk, masks); /* @@ -212,8 +214,8 @@ static int irq_build_affinity_masks(cons else curvec = firstvec + nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, - firstvec, node_to_cpumask, + nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, firstvec, + managed, node_to_cpumask, npresmsk, nmsk, masks); put_online_cpus(); @@ -290,7 +292,7 @@ irq_create_affinity_masks(unsigned int n int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - curvec, masks); + true, curvec, masks); if (ret) { kfree(masks); return NULL; @@ -307,10 +309,6 @@ irq_create_affinity_masks(unsigned int n for (; curvec < nvecs; curvec++) cpumask_copy(&masks[curvec].mask, irq_default_affinity); - /* Mark the managed interrupts */ - for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) - masks[i].is_managed = 1; - return masks; } From patchwork Thu Feb 14 20:48:03 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Gleixner X-Patchwork-Id: 1042433 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=linutronix.de Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 440qVQ4Wq4z9s7h for ; Fri, 15 Feb 2019 08:37:18 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2437900AbfBNVhL (ORCPT ); Thu, 14 Feb 2019 16:37:11 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:51360 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2502507AbfBNVgw (ORCPT ); Thu, 14 Feb 2019 16:36:52 -0500 Received: from localhost ([127.0.0.1] helo=nanos.tec.linutronix.de) by Galois.linutronix.de with esmtp (Exim 4.80) (envelope-from ) id 1guOgI-0002Cv-I7; Thu, 14 Feb 2019 22:36:30 +0100 Message-Id: <20190214211800.077804233@linutronix.de> User-Agent: quilt/0.65 Date: Thu, 14 Feb 2019 21:48:03 +0100 From: Thomas Gleixner To: LKML Cc: Ming Lei , Christoph Hellwig , Bjorn Helgaas , Jens Axboe , linux-block@vger.kernel.org, Sagi Grimberg , linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, Keith Busch , Marc Zyngier , Sumit Saxena , Kashyap Desai , Shivasharan Srikanteshwara Subject: [patch V5 8/8] genirq/affinity: Add support for non-managed affinity sets References: <20190214204755.819014197@linutronix.de> MIME-Version: 1.0 Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Some drivers need an extra set of interrupts which should not be marked managed, but should get initial interrupt spreading. Add a bitmap to struct irq_affinity which allows the driver to mark a particular set of interrupts as non managed. Check the bitmap during spreading and use the result to mark the interrupts in the sets accordingly. The unmanaged interrupts get initial spreading, but user space can change their affinity later on. For the managed sets, i.e. the corresponding bit in the mask is not set, there is no change in behaviour. Usage example: struct irq_affinity affd = { .pre_vectors = 2, .unmanaged_sets = 0x02, .calc_sets = drv_calc_sets, }; .... For both interrupt sets the interrupts are properly spread out, but the second set is not marked managed. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 2 ++ kernel/irq/affinity.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -251,6 +251,7 @@ struct irq_affinity_notify { * the MSI(-X) vector space * @nr_sets: The number of interrupt sets for which affinity * spreading is required + * @unmanaged_sets: Bitmap to mark entries in the @set_size array unmanaged * @set_size: Array holding the size of each interrupt set * @calc_sets: Callback for calculating the number and size * of interrupt sets @@ -261,6 +262,7 @@ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; + unsigned int unmanaged_sets; unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; void (*calc_sets)(struct irq_affinity *, unsigned int nvecs); void *priv; --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -251,6 +251,8 @@ irq_create_affinity_masks(unsigned int n unsigned int affvecs, curvec, usedvecs, i; struct irq_affinity_desc *masks = NULL; + BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS > sizeof(affd->unmanaged_sets) * 8); + /* * If there aren't any vectors left after applying the pre/post * vectors don't bother with assigning affinity. @@ -288,11 +290,12 @@ irq_create_affinity_masks(unsigned int n * have multiple sets, build each sets affinity mask separately. */ for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { + bool managed = affd->unmanaged_sets & (1U << i) ? true : false; unsigned int this_vecs = affd->set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - true, curvec, masks); + managed, curvec, masks); if (ret) { kfree(masks); return NULL;