Patchwork [v5,1/2] PCI: designware: Fix crash in dw_msi_teardown_irq

login
register
mail settings
Submitter Bjørn Erik Nilsen
Date Nov. 29, 2013, 1:35 p.m.
Message ID <1385732125-28630-2-git-send-email-ben@datarespons.no>
Download mbox | patch
Permalink /patch/295335/
State Changes Requested
Headers show

Comments

Bjørn Erik Nilsen - Nov. 29, 2013, 1:35 p.m.
"PCI: designware: Add irq_create_mapping()" resulted
in pre-allocated irq descs. Problem was that in assign_irq()
these descs were explicitly allocated and hence also freed,
resulting in a crash. We also need to clear the entire irq
range in teardown. With this commit the teardown basically
does exactly the opposite of what was done in setup.

Signed-off-by: Bjørn Erik Nilsen <ben@datarespons.no>
---
 drivers/pci/host/pcie-designware.c | 49 ++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 12 deletions(-)
Marek Vasut - Nov. 29, 2013, 2:32 p.m.
Dear Bjørn Erik Nilsen,

I will be a nitpicking bastard below, sorry ;-/

> "PCI: designware: Add irq_create_mapping()" resulted
> in pre-allocated irq descs. Problem was that in assign_irq()
> these descs were explicitly allocated and hence also freed,
> resulting in a crash. We also need to clear the entire irq
> range in teardown. With this commit the teardown basically
> does exactly the opposite of what was done in setup.
> 
> Signed-off-by: Bjørn Erik Nilsen <ben@datarespons.no>

You can stuff your CC list here, like this, if it's more convenient for you then 
to type all these people on the command line:

Cc: Exa Mple <user@example.com>

The git send-email will automatically send email to this address.

> ---
>  drivers/pci/host/pcie-designware.c | 49
> ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+),
> 12 deletions(-)
> 
> diff --git a/drivers/pci/host/pcie-designware.c
> b/drivers/pci/host/pcie-designware.c index e33b68b..61345a1 100644
> --- a/drivers/pci/host/pcie-designware.c
> +++ b/drivers/pci/host/pcie-designware.c
> @@ -209,6 +209,25 @@ static int find_valid_pos0(struct pcie_port *pp, int
> msgvec, int pos, int *pos0) return 0;
>  }
> 
> +static void clear_irq_range(struct pcie_port *pp, unsigned int irq_base,
> +                            unsigned int nvec, unsigned int pos)
> +{
> +	unsigned int i, res, bit, val;
> +
> +	i = 0;
> +	while (i < nvec) {

Won't simple

for (i = 0; i < nvec; i++) {

do here?

> +		irq_set_msi_desc_off(irq_base, i, NULL);

This one returns a return value, please handle it.
ret = irq_set(...);
if (ret)
 goto fail;


> +		clear_bit(pos + i, pp->msi_irq_in_use);
> +		/* Disable corresponding interrupt on MSI interrupt controller 
*/
> +		res = ((pos + i) / 32) * 12;
> +		bit = (pos + i) % 32;
> +		dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
> +		val &= ~(1 << bit);
> +		dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
> +		++i;
> +	}

fail:
 for (--i; i >=0; i--)
   irq...
return ret;

> +}
> +
>  static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
>  {
>  	int res, bit, irq, pos0, pos1, i;
> @@ -242,11 +261,20 @@ static int assign_irq(int no_irqs, struct msi_desc
> *desc, int *pos) if (!irq)
>  		goto no_valid_irq;
> 
> +	/*
> +	 * irq_create_mapping (called from dw_pcie_host_init) pre-allocates
> +	 * descs so there is no need to allocate descs here. We can therefore
> +	 * assume that if irq_find_mapping above returns non-zero, then the
> +	 * descs are also successfully allocated.
> +	 */
> +
>  	i = 0;
>  	while (i < no_irqs) {
> +		if (irq_set_msi_desc_off(irq, i, desc) != 0) {
> +			clear_irq_range(pp, irq, i, pos0);
> +			goto no_valid_irq;
> +		}
>  		set_bit(pos0 + i, pp->msi_irq_in_use);
> -		irq_alloc_descs((irq + i), (irq + i), 1, 0);
> -		irq_set_msi_desc(irq + i, desc);
>  		/*Enable corresponding interrupt in MSI interrupt controller */
>  		res = ((pos0 + i) / 32) * 12;
>  		bit = (pos0 + i) % 32;
> @@ -266,7 +294,7 @@ no_valid_irq:
> 
>  static void clear_irq(unsigned int irq)
>  {
> -	int res, bit, val, pos;
> +	unsigned int pos, nvec;
>  	struct irq_desc *desc;
>  	struct msi_desc *msi;
>  	struct pcie_port *pp;
> @@ -281,18 +309,15 @@ static void clear_irq(unsigned int irq)
>  		return;
>  	}
> 
> +	/* undo what was done in assign_irq */
>  	pos = data->hwirq;
> +	nvec = 1 << msi->msi_attrib.multiple;
> 
> -	irq_free_desc(irq);
> -
> -	clear_bit(pos, pp->msi_irq_in_use);
> +	clear_irq_range(pp, irq, nvec, pos);
> 
> -	/* Disable corresponding interrupt on MSI interrupt controller */
> -	res = (pos / 32) * 12;
> -	bit = pos % 32;
> -	dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
> -	val &= ~(1 << bit);
> -	dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
> +	/* all irqs cleared; reset attributes */
> +	msi->irq = 0;
> +	msi->msi_attrib.multiple = 0;
>  }
> 
>  static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marek Vasut - Nov. 29, 2013, 3:36 p.m.
Dear Bjørn Erik Nilsen,
[...]
> > Won't simple
> > 
> > for (i = 0; i < nvec; i++) {
> > 
> > do here?
> 
> Yes.
> 
> The very same syntax ('while i < nvec') is used in assign_irq. That is
> why I wanted to keep it like that to avoid adding too much confusion, or
> at least make it easy to recognize the same pattern.
> 
> You still want me to change it?

Your loop does exactly what a for() loop would do, there's no need to emulate it 
with a while() loop. If you can fix the other one, fix the other one as well 
please.

> As for the other nitpick, I don't agree with you.
> 
> In fact, dw_msi_teardown_irq has no return value itself. Moreover, if
> setting the msi desc to NULL fails, then it simply means there is no irq
> desc and there is nothing to unwind. Also, skipping the other unwind
> operations just because that single operation failed would leave the
> driver in a much worse state. At least in my opinion.
> 
> What's your opinion on that?

I see this:

$ git grep irq_set_msi_desc_off include/
include/linux/irq.h:extern int irq_set_msi_desc_off(unsigned int irq_base, 
unsigned int irq_offset,

So it has a return value which needs to be handled. Sorry if I wasn't clear on 
the first try.

> Now, if you want me to make another patch do you prefer a standalone
> patch on top of the other patches, or a completely new patchset?

Let's keep iterating :) But please wait for more feedback first.

Best regards,
Marek Vasut
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marek Vasut - Nov. 29, 2013, 5:02 p.m.
Dear Bjørn Erik Nilsen,

> Dear Marek Vasut,
> 
> On Fri, 2013-11-29 at 16:36 +0100, Marek Vasut wrote:
> > Dear Bjørn Erik Nilsen,
> > [...]
> > 
> > > > Won't simple
> > > > 
> > > > for (i = 0; i < nvec; i++) {
> > > > 
> > > > do here?
> > > 
> > > Yes.
> > > 
> > > The very same syntax ('while i < nvec') is used in assign_irq. That is
> > > why I wanted to keep it like that to avoid adding too much confusion,
> > > or at least make it easy to recognize the same pattern.
> > > 
> > > You still want me to change it?
> > 
> > Your loop does exactly what a for() loop would do, there's no need to
> > emulate it with a while() loop. If you can fix the other one, fix the
> > other one as well please.
> 
> Yes, it is syntax sugar indeed and I did it purely for readability as
> explained. You don't seem to agree with me on the readability argument,
> so I can make the new code use a for-loop. And then in an unrelated
> commit (which would look awfully silly) I can change the other
> while-loop too.

Either way is fine with me, I'm just more inclined to the for loop ;-) Let's 
wait for what the others have to say, they can judge this argument.

> > > As for the other nitpick, I don't agree with you.
> > > 
> > > In fact, dw_msi_teardown_irq has no return value itself. Moreover, if
> > > setting the msi desc to NULL fails, then it simply means there is no
> > > irq desc and there is nothing to unwind. Also, skipping the other
> > > unwind operations just because that single operation failed would
> > > leave the driver in a much worse state. At least in my opinion.
> > > 
> > > What's your opinion on that?
> > 
> > I see this:
> > 
> > $ git grep irq_set_msi_desc_off include/
> > include/linux/irq.h:extern int irq_set_msi_desc_off(unsigned int
> > irq_base, unsigned int irq_offset,
> > 
> > So it has a return value which needs to be handled. Sorry if I wasn't
> > clear on the first try.
> 
> I agree that return values should be checked.
> 
> However based on my reasoning there is absolutely nothing to do with the
> return value in this particular case, and in fact bailing out will leave
> the driver in a much worse state (as other unwind operations will be
> skipped).
> 
> That is what I kindly asked you to comment on.

How come you cannot unwind what you did thus far and bail out ? That means the 
design here is seriously broken, don't you think ?

> > > Now, if you want me to make another patch do you prefer a standalone
> > > patch on top of the other patches, or a completely new patchset?
> > 
> > Let's keep iterating :) But please wait for more feedback first.
> 
> OK. I will wait for more feedback then.

ACK, thanks ;-)

Best regards,
Marek Vasut
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marek Vasut - Dec. 2, 2013, 8:10 a.m.
Dear Bjørn Erik Nilsen,

> 29. nov. 2013 kl. 18:02 skrev Marek Vasut <marex@denx.de>:
> > Dear Bjørn Erik Nilsen,
> > 
> >> Dear Marek Vasut,
> >> 
> >> On Fri, 2013-11-29 at 16:36 +0100, Marek Vasut wrote:
> >>> Dear Bjørn Erik Nilsen,
> >>> [...]
> >>> 
> >>>>> Won't simple
> >>>>> 
> >>>>> for (i = 0; i < nvec; i++) {
> >>>>> 
> >>>>> do here?
> >>>> 
> >>>> Yes.
> >>>> 
> >>>> The very same syntax ('while i < nvec') is used in assign_irq. That is
> >>>> why I wanted to keep it like that to avoid adding too much confusion,
> >>>> or at least make it easy to recognize the same pattern.
> >>>> 
> >>>> You still want me to change it?
> >>> 
> >>> Your loop does exactly what a for() loop would do, there's no need to
> >>> emulate it with a while() loop. If you can fix the other one, fix the
> >>> other one as well please.
> >> 
> >> Yes, it is syntax sugar indeed and I did it purely for readability as
> >> explained. You don't seem to agree with me on the readability argument,
> >> so I can make the new code use a for-loop. And then in an unrelated
> >> commit (which would look awfully silly) I can change the other
> >> while-loop too.
> > 
> > Either way is fine with me, I'm just more inclined to the for loop ;-)
> > Let's wait for what the others have to say, they can judge this
> > argument.
> > 
> >>>> As for the other nitpick, I don't agree with you.
> >>>> 
> >>>> In fact, dw_msi_teardown_irq has no return value itself. Moreover, if
> >>>> setting the msi desc to NULL fails, then it simply means there is no
> >>>> irq desc and there is nothing to unwind. Also, skipping the other
> >>>> unwind operations just because that single operation failed would
> >>>> leave the driver in a much worse state. At least in my opinion.
> >>>> 
> >>>> What's your opinion on that?
> >>> 
> >>> I see this:
> >>> 
> >>> $ git grep irq_set_msi_desc_off include/
> >>> include/linux/irq.h:extern int irq_set_msi_desc_off(unsigned int
> >>> irq_base, unsigned int irq_offset,
> >>> 
> >>> So it has a return value which needs to be handled. Sorry if I wasn't
> >>> clear on the first try.
> >> 
> >> I agree that return values should be checked.
> >> 
> >> However based on my reasoning there is absolutely nothing to do with the
> >> return value in this particular case, and in fact bailing out will leave
> >> the driver in a much worse state (as other unwind operations will be
> >> skipped).
> >> 
> >> That is what I kindly asked you to comment on.
> > 
> > How come you cannot unwind what you did thus far and bail out ? That
> > means the design here is seriously broken, don't you think ?
> 
> Maybe you need to take a closer look at the patch because the code in
> question is *the* unwinding code. So bailing out from there  doesn't make
> much sense.
> 
> Also note that I do check the return value of the same function call in
> setup, but the one you are complaining is for unwinding stuff that has
> already been setup.
> 
> I hope this helps :-)

Please consider me stupid and blind, sorry. I think maybe a WARN_ON(ret) might 
be a good sign things have gone severely awry here at least. What do you think?

Best regards,
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index e33b68b..61345a1 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -209,6 +209,25 @@  static int find_valid_pos0(struct pcie_port *pp, int msgvec, int pos, int *pos0)
 	return 0;
 }
 
+static void clear_irq_range(struct pcie_port *pp, unsigned int irq_base,
+                            unsigned int nvec, unsigned int pos)
+{
+	unsigned int i, res, bit, val;
+
+	i = 0;
+	while (i < nvec) {
+		irq_set_msi_desc_off(irq_base, i, NULL);
+		clear_bit(pos + i, pp->msi_irq_in_use);
+		/* Disable corresponding interrupt on MSI interrupt controller */
+		res = ((pos + i) / 32) * 12;
+		bit = (pos + i) % 32;
+		dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
+		val &= ~(1 << bit);
+		dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+		++i;
+	}
+}
+
 static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
 {
 	int res, bit, irq, pos0, pos1, i;
@@ -242,11 +261,20 @@  static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
 	if (!irq)
 		goto no_valid_irq;
 
+	/*
+	 * irq_create_mapping (called from dw_pcie_host_init) pre-allocates
+	 * descs so there is no need to allocate descs here. We can therefore
+	 * assume that if irq_find_mapping above returns non-zero, then the
+	 * descs are also successfully allocated.
+	 */
+
 	i = 0;
 	while (i < no_irqs) {
+		if (irq_set_msi_desc_off(irq, i, desc) != 0) {
+			clear_irq_range(pp, irq, i, pos0);
+			goto no_valid_irq;
+		}
 		set_bit(pos0 + i, pp->msi_irq_in_use);
-		irq_alloc_descs((irq + i), (irq + i), 1, 0);
-		irq_set_msi_desc(irq + i, desc);
 		/*Enable corresponding interrupt in MSI interrupt controller */
 		res = ((pos0 + i) / 32) * 12;
 		bit = (pos0 + i) % 32;
@@ -266,7 +294,7 @@  no_valid_irq:
 
 static void clear_irq(unsigned int irq)
 {
-	int res, bit, val, pos;
+	unsigned int pos, nvec;
 	struct irq_desc *desc;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
@@ -281,18 +309,15 @@  static void clear_irq(unsigned int irq)
 		return;
 	}
 
+	/* undo what was done in assign_irq */
 	pos = data->hwirq;
+	nvec = 1 << msi->msi_attrib.multiple;
 
-	irq_free_desc(irq);
-
-	clear_bit(pos, pp->msi_irq_in_use);
+	clear_irq_range(pp, irq, nvec, pos);
 
-	/* Disable corresponding interrupt on MSI interrupt controller */
-	res = (pos / 32) * 12;
-	bit = pos % 32;
-	dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
-	val &= ~(1 << bit);
-	dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+	/* all irqs cleared; reset attributes */
+	msi->irq = 0;
+	msi->msi_attrib.multiple = 0;
 }
 
 static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,