[kernel,v3] powerpc/xive: Drop deregistered irqs
diff mbox series

Message ID 20190717050028.85926-1-aik@ozlabs.ru
State Superseded
Headers show
Series
  • [kernel,v3] powerpc/xive: Drop deregistered irqs
Related show

Checks

Context Check Description
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 55 lines checked
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (f5c20693d8edcd665f1159dc941b9e7f87c17647)

Commit Message

Alexey Kardashevskiy July 17, 2019, 5 a.m. UTC
There is a race between releasing an irq on one cpu and fetching it
from XIVE on another cpu as there does not seem to be any locking between
these, probably because xive_irq_chip::irq_shutdown() is supposed to
remove the irq from all queues in the system which it does not do.

As a result, when such released irq appears in a queue, we take it
from the queue but we do not change the current priority on that cpu and
since there is no handler for the irq, EOI is never called and the cpu
current priority remains elevated (7 vs. 0xff==unmasked). If another irq
is assigned to the same cpu, then that device stops working until irq
is moved to another cpu or the device is reset.

This adds a new ppc_md.orphan_irq callback which is called if no irq
descriptor is found. The XIVE implementation drops the current priority
to 0xff which effectively unmasks interrupts in a current CPU.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
---
Changes:
v3:
* added a comment above xive_orphan_irq()

v2:
* added ppc_md.orphan_irq
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/irq.c          |  9 ++++++---
 arch/powerpc/sysdev/xive/common.c  | 18 ++++++++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

Comments

Benjamin Herrenschmidt July 17, 2019, 5:04 a.m. UTC | #1
On Wed, 2019-07-17 at 15:00 +1000, Alexey Kardashevskiy wrote:
> There is a race between releasing an irq on one cpu and fetching it
> from XIVE on another cpu as there does not seem to be any locking between
> these, probably because xive_irq_chip::irq_shutdown() is supposed to
> remove the irq from all queues in the system which it does not do.
> 
> As a result, when such released irq appears in a queue, we take it
> from the queue but we do not change the current priority on that cpu and
> since there is no handler for the irq, EOI is never called and the cpu
> current priority remains elevated (7 vs. 0xff==unmasked). If another irq
> is assigned to the same cpu, then that device stops working until irq
> is moved to another cpu or the device is reset.
> 
> This adds a new ppc_md.orphan_irq callback which is called if no irq
> descriptor is found. The XIVE implementation drops the current priority
> to 0xff which effectively unmasks interrupts in a current CPU.

Better.

Now, you should proably add orphan_irq as a separate patch, and it
wouldn't hurt to make other PICs like XICS also provide it :-) They are
less likely to hit due to the absence of queuing but I suppose the
theorical race exists.

Cheers,
Ben.

> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Reviewed-by: Cédric Le Goater <clg@kaod.org>
> ---
> Changes:
> v3:
> * added a comment above xive_orphan_irq()
> 
> v2:
> * added ppc_md.orphan_irq
> ---
>  arch/powerpc/include/asm/machdep.h |  3 +++
>  arch/powerpc/kernel/irq.c          |  9 ++++++---
>  arch/powerpc/sysdev/xive/common.c  | 18 ++++++++++++++++++
>  3 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index c43d6eca9edd..6cc14e28e89a 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -59,6 +59,9 @@ struct machdep_calls {
>  	/* Return an irq, or 0 to indicate there are none pending. */
>  	unsigned int	(*get_irq)(void);
>  
> +	/* Drops irq if it does not have a valid descriptor */
> +	void		(*orphan_irq)(unsigned int irq);
> +
>  	/* PCI stuff */
>  	/* Called after allocating resources */
>  	void		(*pcibios_fixup)(void);
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index bc68c53af67c..b4e06d05bdba 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -632,10 +632,13 @@ void __do_irq(struct pt_regs *regs)
>  	may_hard_irq_enable();
>  
>  	/* And finally process it */
> -	if (unlikely(!irq))
> +	if (unlikely(!irq)) {
>  		__this_cpu_inc(irq_stat.spurious_irqs);
> -	else
> -		generic_handle_irq(irq);
> +	} else if (generic_handle_irq(irq)) {
> +		if (ppc_md.orphan_irq)
> +			ppc_md.orphan_irq(irq);
> +		__this_cpu_inc(irq_stat.spurious_irqs);
> +	}
>  
>  	trace_irq_exit(regs);
>  
> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
> index 082c7e1c20f0..17e696b2d71b 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -283,6 +283,23 @@ static unsigned int xive_get_irq(void)
>  	return irq;
>  }
>  
> +/*
> + * Handles the case when a target CPU catches an interrupt which is being shut
> + * down on another CPU. generic_handle_irq() returns an error in such case
> + * and then the orphan_irq() handler restores the CPPR to reenable interrupts.
> + *
> + * Without orphan_irq() and valid irq_desc, there is no other way to restore
> + * the CPPR. This executes on a CPU which caught the interrupt.
> + */
> +static void xive_orphan_irq(unsigned int irq)
> +{
> +	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
> +
> +	xc->cppr = 0xff;
> +	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
> +	DBG_VERBOSE("orphan_irq: irq %d, adjusting CPPR to 0xff\n", irq);
> +}
> +
>  /*
>   * After EOI'ing an interrupt, we need to re-check the queue
>   * to see if another interrupt is pending since multiple
> @@ -1419,6 +1436,7 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o
>  	xive_irq_priority = max_prio;
>  
>  	ppc_md.get_irq = xive_get_irq;
> +	ppc_md.orphan_irq = xive_orphan_irq;
>  	__xive_enabled = true;
>  
>  	pr_devel("Initializing host..\n");
Alexey Kardashevskiy July 17, 2019, 6:06 a.m. UTC | #2
On 17/07/2019 15:00, Alexey Kardashevskiy wrote:
> There is a race between releasing an irq on one cpu and fetching it
> from XIVE on another cpu as there does not seem to be any locking between
> these, probably because xive_irq_chip::irq_shutdown() is supposed to
> remove the irq from all queues in the system which it does not do.
> 
> As a result, when such released irq appears in a queue, we take it
> from the queue but we do not change the current priority on that cpu and
> since there is no handler for the irq, EOI is never called and the cpu
> current priority remains elevated (7 vs. 0xff==unmasked). If another irq
> is assigned to the same cpu, then that device stops working until irq
> is moved to another cpu or the device is reset.
> 
> This adds a new ppc_md.orphan_irq callback which is called if no irq
> descriptor is found. The XIVE implementation drops the current priority
> to 0xff which effectively unmasks interrupts in a current CPU.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Reviewed-by: Cédric Le Goater <clg@kaod.org>

Of course I missed this:


Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE 
interrupt controller")




> ---
> Changes:
> v3:
> * added a comment above xive_orphan_irq()
> 
> v2:
> * added ppc_md.orphan_irq
> ---
>   arch/powerpc/include/asm/machdep.h |  3 +++
>   arch/powerpc/kernel/irq.c          |  9 ++++++---
>   arch/powerpc/sysdev/xive/common.c  | 18 ++++++++++++++++++
>   3 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index c43d6eca9edd..6cc14e28e89a 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -59,6 +59,9 @@ struct machdep_calls {
>   	/* Return an irq, or 0 to indicate there are none pending. */
>   	unsigned int	(*get_irq)(void);
>   
> +	/* Drops irq if it does not have a valid descriptor */
> +	void		(*orphan_irq)(unsigned int irq);
> +
>   	/* PCI stuff */
>   	/* Called after allocating resources */
>   	void		(*pcibios_fixup)(void);
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index bc68c53af67c..b4e06d05bdba 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -632,10 +632,13 @@ void __do_irq(struct pt_regs *regs)
>   	may_hard_irq_enable();
>   
>   	/* And finally process it */
> -	if (unlikely(!irq))
> +	if (unlikely(!irq)) {
>   		__this_cpu_inc(irq_stat.spurious_irqs);
> -	else
> -		generic_handle_irq(irq);
> +	} else if (generic_handle_irq(irq)) {
> +		if (ppc_md.orphan_irq)
> +			ppc_md.orphan_irq(irq);
> +		__this_cpu_inc(irq_stat.spurious_irqs);
> +	}
>   
>   	trace_irq_exit(regs);
>   
> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
> index 082c7e1c20f0..17e696b2d71b 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -283,6 +283,23 @@ static unsigned int xive_get_irq(void)
>   	return irq;
>   }
>   
> +/*
> + * Handles the case when a target CPU catches an interrupt which is being shut
> + * down on another CPU. generic_handle_irq() returns an error in such case
> + * and then the orphan_irq() handler restores the CPPR to reenable interrupts.
> + *
> + * Without orphan_irq() and valid irq_desc, there is no other way to restore
> + * the CPPR. This executes on a CPU which caught the interrupt.
> + */
> +static void xive_orphan_irq(unsigned int irq)
> +{
> +	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
> +
> +	xc->cppr = 0xff;
> +	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
> +	DBG_VERBOSE("orphan_irq: irq %d, adjusting CPPR to 0xff\n", irq);
> +}
> +
>   /*
>    * After EOI'ing an interrupt, we need to re-check the queue
>    * to see if another interrupt is pending since multiple
> @@ -1419,6 +1436,7 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o
>   	xive_irq_priority = max_prio;
>   
>   	ppc_md.get_irq = xive_get_irq;
> +	ppc_md.orphan_irq = xive_orphan_irq;
>   	__xive_enabled = true;
>   
>   	pr_devel("Initializing host..\n");
>
Cédric Le Goater July 27, 2019, 8:20 a.m. UTC | #3
Hello Alexey,

>> --- a/arch/powerpc/kernel/irq.c
>> +++ b/arch/powerpc/kernel/irq.c
>> @@ -632,10 +632,13 @@ void __do_irq(struct pt_regs *regs)
>>       may_hard_irq_enable();
>>         /* And finally process it */
>> -    if (unlikely(!irq))
>> +    if (unlikely(!irq)) {
>>           __this_cpu_inc(irq_stat.spurious_irqs);
>> -    else
>> -        generic_handle_irq(irq);
>> +    } else if (generic_handle_irq(irq)) {
>> +        if (ppc_md.orphan_irq)
>> +            ppc_md.orphan_irq(irq);
>> +        __this_cpu_inc(irq_stat.spurious_irqs);
>> +    }

I think we still have an issue here. 

The fix is relying on the fact that generic_handle_irq() will return 
EINVAL if irq desc is NULL, and if this is the case the top interrupt 
handler will consider we have an orphan interrupt.

But, we could also be in the middle of irq_domain->map() and have a 
partially initialized descriptor if the same interrupt is being remapped 
on a CPU while another is trying to handle orphans. 

Calling can_request_irq() (which checks the IRQ_NOREQUEST flag) should 
close that window and it would be clearer than relying on the return value 
of generic_handle_irq().  

C.

Patch
diff mbox series

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c43d6eca9edd..6cc14e28e89a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@  struct machdep_calls {
 	/* Return an irq, or 0 to indicate there are none pending. */
 	unsigned int	(*get_irq)(void);
 
+	/* Drops irq if it does not have a valid descriptor */
+	void		(*orphan_irq)(unsigned int irq);
+
 	/* PCI stuff */
 	/* Called after allocating resources */
 	void		(*pcibios_fixup)(void);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bc68c53af67c..b4e06d05bdba 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -632,10 +632,13 @@  void __do_irq(struct pt_regs *regs)
 	may_hard_irq_enable();
 
 	/* And finally process it */
-	if (unlikely(!irq))
+	if (unlikely(!irq)) {
 		__this_cpu_inc(irq_stat.spurious_irqs);
-	else
-		generic_handle_irq(irq);
+	} else if (generic_handle_irq(irq)) {
+		if (ppc_md.orphan_irq)
+			ppc_md.orphan_irq(irq);
+		__this_cpu_inc(irq_stat.spurious_irqs);
+	}
 
 	trace_irq_exit(regs);
 
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 082c7e1c20f0..17e696b2d71b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -283,6 +283,23 @@  static unsigned int xive_get_irq(void)
 	return irq;
 }
 
+/*
+ * Handles the case when a target CPU catches an interrupt which is being shut
+ * down on another CPU. generic_handle_irq() returns an error in such case
+ * and then the orphan_irq() handler restores the CPPR to reenable interrupts.
+ *
+ * Without orphan_irq() and valid irq_desc, there is no other way to restore
+ * the CPPR. This executes on a CPU which caught the interrupt.
+ */
+static void xive_orphan_irq(unsigned int irq)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	xc->cppr = 0xff;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+	DBG_VERBOSE("orphan_irq: irq %d, adjusting CPPR to 0xff\n", irq);
+}
+
 /*
  * After EOI'ing an interrupt, we need to re-check the queue
  * to see if another interrupt is pending since multiple
@@ -1419,6 +1436,7 @@  bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o
 	xive_irq_priority = max_prio;
 
 	ppc_md.get_irq = xive_get_irq;
+	ppc_md.orphan_irq = xive_orphan_irq;
 	__xive_enabled = true;
 
 	pr_devel("Initializing host..\n");