diff mbox series

[SRU,bionic:linux-azure-4.15,1/1] UBUNTU: SAUCE: PCI: hv: Only reuse existing IRTE allocation for Multi-MSI

Message ID 20221115170031.13179-2-john.cabaj@canonical.com
State New
Headers show
Series UBUNTU: SAUCE: Call trace during nvme storage performance test in bionic/linux-azure 4.15.0-1154 | expand

Commit Message

John Cabaj Nov. 15, 2022, 5 p.m. UTC
BugLink: https://bugs.launchpad.net/bugs/1996093

Fixes b4b77778ecc5 ("PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()")

Signed-off-by: John Cabaj <john.cabaj@canonical.com>
---
 drivers/pci/host/pci-hyperv.c | 68 +++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 15 deletions(-)

Comments

Marcelo Henrique Cerri Nov. 16, 2022, 2:01 p.m. UTC | #1
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512


On Tue, Nov 15 2022, John Cabaj wrote:
> BugLink: https://bugs.launchpad.net/bugs/1996093
>
> Fixes b4b77778ecc5 ("PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()")
>
> Signed-off-by: John Cabaj <john.cabaj@canonical.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 68 +++++++++++++++++++++++++++--------
>  1 file changed, 53 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index 07100e7d87de..6465ac8d4193 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -1101,7 +1101,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
>  }
>
>  static u32 hv_compose_msi_req_v1(
> -	struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
> +	struct pci_create_interrupt *int_pkt,
>  	u32 slot, u8 vector, u8 vector_count)
>  {
>  	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
> @@ -1119,23 +1119,48 @@ static u32 hv_compose_msi_req_v1(
>  	return sizeof(*int_pkt);
>  }
>
> -static u32 hv_compose_msi_req_v2(
> -	struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
> -	u32 slot, u8 vector, u8 vector_count)
> +/*
> + * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
> + * by subsequent retarget in hv_irq_unmask().
> + */
> +static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
> +{
> +	return cpumask_first_and(affinity, cpu_online_mask);
> +}
> +
> +/*
> + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
> + */
> +static int hv_compose_multi_msi_req_get_cpu(void)
>  {
> +	static DEFINE_SPINLOCK(multi_msi_cpu_lock);
> +
> +	/* -1 means starting with CPU 0 */
> +	static int cpu_next = -1;
> +
> +	unsigned long flags;
>  	int cpu;
>
> +	spin_lock_irqsave(&multi_msi_cpu_lock, flags);
> +
> +	cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
> +				     false);
> +	cpu = cpu_next;
> +
> +	spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
> +
> +	return cpu;
> +}
> +
> +static u32 hv_compose_msi_req_v2(
> +	struct pci_create_interrupt2 *int_pkt, int cpu,
> +	u32 slot, u8 vector, u8 vector_count)
> +{
>  	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
>  	int_pkt->wslot.slot = slot;
>  	int_pkt->int_desc.vector = vector;
>  	int_pkt->int_desc.vector_count = vector_count;
>  	int_pkt->int_desc.delivery_mode = dest_Fixed;
> -
> -	/*
> -	 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
> -	 * by subsequent retarget in hv_irq_unmask().
> -	 */
> -	cpu = cpumask_first_and(affinity, cpu_online_mask);
>  	int_pkt->int_desc.processor_array[0] =
>  		hv_cpu_number_to_vp_number(cpu);
>  	int_pkt->int_desc.processor_count = 1;
> @@ -1174,11 +1199,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  		} int_pkts;
>  	} __packed ctxt;
>
> +	bool multi_msi;
>  	u32 size;
>  	int ret;
> +	int cpu;
> +
> +	msi_desc  = irq_data_get_msi_desc(data);
> +	multi_msi = !msi_desc->msi_attrib.is_msix &&
> +		    msi_desc->nvec_used > 1;
>
>  	/* Reuse the previous allocation */
> -	if (data->chip_data) {
> +	if (data->chip_data && multi_msi) {
>  		int_desc = data->chip_data;
>  		msg->address_hi = int_desc->address >> 32;
>  		msg->address_lo = int_desc->address & 0xffffffff;
> @@ -1186,7 +1217,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  		return;
>  	}
>
> -	msi_desc = irq_data_get_msi_desc(data);
>  	pdev = msi_desc_to_pci_dev(msi_desc);
>  	dest = irq_data_get_effective_affinity_mask(data);
>  	pbus = pdev->bus;
> @@ -1194,12 +1224,19 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
>  	if (!hpdev)
>  		goto return_null_message;
> +
> +	/* Free any previous message that might have already been composed. */
> +	if (data->chip_data && !multi_msi) {
> +		int_desc = data->chip_data;
> +		data->chip_data = NULL;
> +		hv_int_desc_free(hpdev, int_desc);
> +	}
>
>  	int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
>  	if (!int_desc)
>  		goto drop_reference;
>
> -	if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) {
> +	if (multi_msi) {
>  		/*
>  		 * If this is not the first MSI of Multi MSI, we already have
>  		 * a mapping.  Can exit early.
> @@ -1224,9 +1261,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  		 */
>  		vector = 32;
>  		vector_count = msi_desc->nvec_used;
> +		cpu = hv_compose_multi_msi_req_get_cpu();
>  	} else {
>  		vector = hv_msi_get_int_vector(data);
>  		vector_count = 1;
> +		cpu = hv_compose_msi_req_get_cpu(dest);
>  	}
>
>  	memset(&ctxt, 0, sizeof(ctxt));
> @@ -1237,7 +1276,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  	switch (pci_protocol_version) {
>  	case PCI_PROTOCOL_VERSION_1_1:
>  		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
> -					dest,
>  					hpdev->desc.win_slot.slot,
>  					vector,
>  					vector_count);
> @@ -1246,7 +1284,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  	case PCI_PROTOCOL_VERSION_1_2:
>  	case PCI_PROTOCOL_VERSION_1_3:
>  		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
> -					dest,
> +					cpu,
>  					hpdev->desc.win_slot.slot,
>  					vector,
>  					vector_count);
> --
> 2.34.1

The patch looks good to me and it has good test results.

However the patch is very similar to the upstream one with some changes
to accommodate hv_compose_msi_req_get_cpu() that is missing in the 4.15
kernel. In this case, I think it makes more sense to keep the original
author and commit message and document the changes we had to include.

Since Tim is applying this patch I believe he can fix that when applying
it.

Acked-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>

- --
Regards,
Marcelo
-----BEGIN PGP SIGNATURE-----

iQHQBAEBCgA6FiEExJjLjAfVL0XbfEr56e82LoessAkFAmN07eccHG1hcmNlbG8u
Y2VycmlAY2Fub25pY2FsLmNvbQAKCRDp7zYuh6ywCY97DACK2pzp0qp75gYRCsm7
1/sQKokIOKTFGAHMyrfR70y5mQvhmXbCMdfPxTazb5daTb0qfeYAkPkiaTo5WV1N
lPKcz9y2oX64mdVAZxp5JKqHJXHMZ36jEt4tTPSj0Yqq8PUoknBnB3HV6k2daNqm
QXAQCbCxCrp5+Au4T5qdTH7tR3mfzlHYvyvYtw8VdLTYUwDQ9tUupCYj9J376o7s
VSGhbmFUFVqUOutPgxEAA5JuPIsXCUFAqePX1jGCjkfSNhoo1+BhlrdarNmV30O0
s6VinpI+cV3NediJO6JNjK4As8ozt2k/ntXM4GMydGkpB/thx1LfAdo1ZAmXU/yD
Zj8bhcwZdxgVw5T/9spaR4+r2RskXEdbYgv599aQEJjF8jdsmNqGdwQLpj6ORjVC
MsnQaki1TCIGpCsc4Bd0H+ntj2hTHMCF7hFaZsbmsX99U90ot4aiWP8aMwan1W+B
6djVBKUU4hqKU4bn6eDE3/w7qixQffb0H2O8G/AQ0L43oU0=
=9EG7
-----END PGP SIGNATURE-----
diff mbox series

Patch

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 07100e7d87de..6465ac8d4193 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1101,7 +1101,7 @@  static void hv_pci_compose_compl(void *context, struct pci_response *resp,
 }
 
 static u32 hv_compose_msi_req_v1(
-	struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
+	struct pci_create_interrupt *int_pkt,
 	u32 slot, u8 vector, u8 vector_count)
 {
 	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
@@ -1119,23 +1119,48 @@  static u32 hv_compose_msi_req_v1(
 	return sizeof(*int_pkt);
 }
 
-static u32 hv_compose_msi_req_v2(
-	struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
-	u32 slot, u8 vector, u8 vector_count)
+/*
+ * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
+ * by subsequent retarget in hv_irq_unmask().
+ */
+static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
+{
+	return cpumask_first_and(affinity, cpu_online_mask);
+}
+
+/*
+ * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
+ */
+static int hv_compose_multi_msi_req_get_cpu(void)
 {
+	static DEFINE_SPINLOCK(multi_msi_cpu_lock);
+
+	/* -1 means starting with CPU 0 */
+	static int cpu_next = -1;
+
+	unsigned long flags;
 	int cpu;
 
+	spin_lock_irqsave(&multi_msi_cpu_lock, flags);
+
+	cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
+				     false);
+	cpu = cpu_next;
+
+	spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
+
+	return cpu;
+}
+
+static u32 hv_compose_msi_req_v2(
+	struct pci_create_interrupt2 *int_pkt, int cpu,
+	u32 slot, u8 vector, u8 vector_count)
+{
 	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
 	int_pkt->wslot.slot = slot;
 	int_pkt->int_desc.vector = vector;
 	int_pkt->int_desc.vector_count = vector_count;
 	int_pkt->int_desc.delivery_mode = dest_Fixed;
-
-	/*
-	 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
-	 * by subsequent retarget in hv_irq_unmask().
-	 */
-	cpu = cpumask_first_and(affinity, cpu_online_mask);
 	int_pkt->int_desc.processor_array[0] =
 		hv_cpu_number_to_vp_number(cpu);
 	int_pkt->int_desc.processor_count = 1;
@@ -1174,11 +1199,17 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		} int_pkts;
 	} __packed ctxt;
 
+	bool multi_msi;
 	u32 size;
 	int ret;
+	int cpu;
+
+	msi_desc  = irq_data_get_msi_desc(data);
+	multi_msi = !msi_desc->msi_attrib.is_msix &&
+		    msi_desc->nvec_used > 1;
 
 	/* Reuse the previous allocation */
-	if (data->chip_data) {
+	if (data->chip_data && multi_msi) {
 		int_desc = data->chip_data;
 		msg->address_hi = int_desc->address >> 32;
 		msg->address_lo = int_desc->address & 0xffffffff;
@@ -1186,7 +1217,6 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		return;
 	}
 
-	msi_desc = irq_data_get_msi_desc(data);
 	pdev = msi_desc_to_pci_dev(msi_desc);
 	dest = irq_data_get_effective_affinity_mask(data);
 	pbus = pdev->bus;
@@ -1194,12 +1224,19 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
 	if (!hpdev)
 		goto return_null_message;
+ 
+	/* Free any previous message that might have already been composed. */
+	if (data->chip_data && !multi_msi) {
+		int_desc = data->chip_data;
+		data->chip_data = NULL;
+		hv_int_desc_free(hpdev, int_desc);
+	}
 
 	int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
 	if (!int_desc)
 		goto drop_reference;
 
-	if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) {
+	if (multi_msi) {
 		/*
 		 * If this is not the first MSI of Multi MSI, we already have
 		 * a mapping.  Can exit early.
@@ -1224,9 +1261,11 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		 */
 		vector = 32;
 		vector_count = msi_desc->nvec_used;
+		cpu = hv_compose_multi_msi_req_get_cpu();
 	} else {
 		vector = hv_msi_get_int_vector(data);
 		vector_count = 1;
+		cpu = hv_compose_msi_req_get_cpu(dest);
 	}
 
 	memset(&ctxt, 0, sizeof(ctxt));
@@ -1237,7 +1276,6 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	switch (pci_protocol_version) {
 	case PCI_PROTOCOL_VERSION_1_1:
 		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
-					dest,
 					hpdev->desc.win_slot.slot,
 					vector,
 					vector_count);
@@ -1246,7 +1284,7 @@  static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	case PCI_PROTOCOL_VERSION_1_2:
 	case PCI_PROTOCOL_VERSION_1_3:
 		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
-					dest,
+					cpu,
 					hpdev->desc.win_slot.slot,
 					vector,
 					vector_count);