diff mbox

[v2,4/5] ARC: MCIP: Set an initial affinity value in idu_irq_map

Message ID 1477313194-2310-4-git-send-email-yuriy.kolerov@synopsys.com
State New
Headers show

Commit Message

Yuriy Kolerov Oct. 24, 2016, 12:46 p.m. UTC
Originally an initial distribution mode (its value resides in Device Tree)
for each common interrupt is set in idu_irq_xlate. This leads to the
following problems. idu_irq_xlate may be called several times during parsing
of the Device Tree and it is semantically wrong to configure an initial
distribution mode here. Also a value of affinity (CPUs bitmap) is not saved
to irq_desc structure for the virq - later (after parsing of the DT) kernel
sees that affinity is not set and sets a default value of affinity (all
cores in round robin mode). As a result a value of affinity from Device
Tree is ignored.

To fix it I created a buffer for initial CPUs bitmaps from Device Tree.
In idu_irq_xlate those bitmaps are saved to the buffer. Then affinity
for virq is set manually in idu_irq_map. It works because idu_irq_xlate
is always called before idu_irq_map.

Despite the fact that it works I think that it must be rewritten to
eliminate usage of the buffer and move all logic to idu_irq_map but
I do not know how to do it correctly.

Signed-off-by: Yuriy Kolerov <yuriy.kolerov@synopsys.com>
---
 arch/arc/kernel/mcip.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Vineet Gupta Oct. 25, 2016, 6:28 p.m. UTC | #1
On 10/24/2016 05:46 AM, Yuriy Kolerov wrote:
> Originally an initial distribution mode (its value resides in Device Tree)
> for each common interrupt is set in idu_irq_xlate. This leads to the
> following problems. idu_irq_xlate may be called several times during parsing
> of the Device Tree and it is semantically wrong to configure an initial
> distribution mode here. Also a value of affinity (CPUs bitmap) is not saved
> to irq_desc structure for the virq - later (after parsing of the DT) kernel
> sees that affinity is not set and sets a default value of affinity (all
> cores in round robin mode). As a result a value of affinity from Device
> Tree is ignored.
> 
> To fix it I created a buffer for initial CPUs bitmaps from Device Tree.
> In idu_irq_xlate those bitmaps are saved to the buffer. Then affinity
> for virq is set manually in idu_irq_map. It works because idu_irq_xlate
> is always called before idu_irq_map.
> 
> Despite the fact that it works I think that it must be rewritten to
> eliminate usage of the buffer and move all logic to idu_irq_map but
> I do not know how to do it correctly.
> 
> Signed-off-by: Yuriy Kolerov <yuriy.kolerov@synopsys.com>
> ---
>  arch/arc/kernel/mcip.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
> index 51a218c..090f0a1 100644
> --- a/arch/arc/kernel/mcip.c
> +++ b/arch/arc/kernel/mcip.c
> @@ -12,12 +12,15 @@
>  #include <linux/irq.h>
>  #include <linux/spinlock.h>
>  #include <linux/irqdomain.h>
> +#include <linux/interrupt.h>
> +#include <linux/cpumask.h>
>  #include <asm/irqflags-arcv2.h>
>  #include <asm/mcip.h>
>  #include <asm/setup.h>
>  
>  static char smp_cpuinfo_buf[128];
>  static int idu_detected;
> +static unsigned long idu_cirq_to_dest[CONFIG_ARC_NUMBER_OF_INTERRUPTS];
>  
>  static DEFINE_RAW_SPINLOCK(mcip_lock);
>  
> @@ -232,9 +235,15 @@ static void idu_cascade_isr(struct irq_desc *desc)
>  
>  static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
>  {
> +	cpumask_t mask;
> +
>  	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
>  	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
>  
> +	cpumask_clear(&mask);
> +	cpumask_bits(&mask)[0] |= idu_cirq_to_dest[hwirq];
> +	irq_set_affinity(virq, &mask);
> +
>  	return 0;
>  }
>  
> @@ -252,8 +261,7 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
>  	if (distri == 0) {
>  		/* 0 - Round Robin to all cpus, otherwise 1 bit per core */
>  		raw_spin_lock_irqsave(&mcip_lock, flags);
> -		idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
> -		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
> +		idu_cirq_to_dest[hwirq] = BIT(num_online_cpus()) - 1;
>  		raw_spin_unlock_irqrestore(&mcip_lock, flags);
>  	} else {
>  		/*
> @@ -267,8 +275,7 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
>  				hwirq, cpu);
>  
>  		raw_spin_lock_irqsave(&mcip_lock, flags);
> -		idu_set_dest(hwirq, cpu);
> -		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
> +		idu_cirq_to_dest[hwirq] = cpu;
>  		raw_spin_unlock_irqrestore(&mcip_lock, flags);
>  	}
> 

So I missed this part - you are not touching the hardware here at all - and so we
don't really need the spin lock check. Nevertheless, as you said off list, this
patch is more of a hack and we really need to find a saner way of doing this !

@Marc, @tglx any guidance here - changelog at the top has the motivation for this
hack !

Thx,
-Vineet
Marc Zyngier Oct. 26, 2016, 2:05 p.m. UTC | #2
On 25/10/16 19:28, Vineet Gupta wrote:
> On 10/24/2016 05:46 AM, Yuriy Kolerov wrote:
>> Originally an initial distribution mode (its value resides in Device Tree)
>> for each common interrupt is set in idu_irq_xlate. This leads to the
>> following problems. idu_irq_xlate may be called several times during parsing
>> of the Device Tree and it is semantically wrong to configure an initial
>> distribution mode here. Also a value of affinity (CPUs bitmap) is not saved
>> to irq_desc structure for the virq - later (after parsing of the DT) kernel
>> sees that affinity is not set and sets a default value of affinity (all
>> cores in round robin mode). As a result a value of affinity from Device
>> Tree is ignored.
>>
>> To fix it I created a buffer for initial CPUs bitmaps from Device Tree.
>> In idu_irq_xlate those bitmaps are saved to the buffer. Then affinity
>> for virq is set manually in idu_irq_map. It works because idu_irq_xlate
>> is always called before idu_irq_map.
>>
>> Despite the fact that it works I think that it must be rewritten to
>> eliminate usage of the buffer and move all logic to idu_irq_map but
>> I do not know how to do it correctly.
>>
>> Signed-off-by: Yuriy Kolerov <yuriy.kolerov@synopsys.com>
>> ---
>>  arch/arc/kernel/mcip.c | 15 +++++++++++----
>>  1 file changed, 11 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
>> index 51a218c..090f0a1 100644
>> --- a/arch/arc/kernel/mcip.c
>> +++ b/arch/arc/kernel/mcip.c
>> @@ -12,12 +12,15 @@
>>  #include <linux/irq.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/irqdomain.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/cpumask.h>
>>  #include <asm/irqflags-arcv2.h>
>>  #include <asm/mcip.h>
>>  #include <asm/setup.h>
>>  
>>  static char smp_cpuinfo_buf[128];
>>  static int idu_detected;
>> +static unsigned long idu_cirq_to_dest[CONFIG_ARC_NUMBER_OF_INTERRUPTS];
>>  
>>  static DEFINE_RAW_SPINLOCK(mcip_lock);
>>  
>> @@ -232,9 +235,15 @@ static void idu_cascade_isr(struct irq_desc *desc)
>>  
>>  static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
>>  {
>> +	cpumask_t mask;
>> +
>>  	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
>>  	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
>>  
>> +	cpumask_clear(&mask);
>> +	cpumask_bits(&mask)[0] |= idu_cirq_to_dest[hwirq];
>> +	irq_set_affinity(virq, &mask);
>> +
>>  	return 0;
>>  }
>>  
>> @@ -252,8 +261,7 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
>>  	if (distri == 0) {
>>  		/* 0 - Round Robin to all cpus, otherwise 1 bit per core */
>>  		raw_spin_lock_irqsave(&mcip_lock, flags);
>> -		idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
>> -		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
>> +		idu_cirq_to_dest[hwirq] = BIT(num_online_cpus()) - 1;
>>  		raw_spin_unlock_irqrestore(&mcip_lock, flags);
>>  	} else {
>>  		/*
>> @@ -267,8 +275,7 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
>>  				hwirq, cpu);
>>  
>>  		raw_spin_lock_irqsave(&mcip_lock, flags);
>> -		idu_set_dest(hwirq, cpu);
>> -		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
>> +		idu_cirq_to_dest[hwirq] = cpu;
>>  		raw_spin_unlock_irqrestore(&mcip_lock, flags);
>>  	}
>>
> 
> So I missed this part - you are not touching the hardware here at all - and so we
> don't really need the spin lock check. Nevertheless, as you said off list, this
> patch is more of a hack and we really need to find a saner way of doing this !
> 
> @Marc, @tglx any guidance here - changelog at the top has the motivation for this
> hack !

It definitely feels weird to encode the interrupt affinity in the DT
(the kernel and possible userspace usually know much better than the
firmware). What is the actual reason for storing the affinity there?

Thanks,

	M.
Vineet Gupta Oct. 26, 2016, 4:17 p.m. UTC | #3
On 10/26/2016 07:05 AM, Marc Zyngier wrote:
> It definitely feels weird to encode the interrupt affinity in the DT
> (the kernel and possible userspace usually know much better than the
> firmware). What is the actual reason for storing the affinity there?

The IDU intc supports various interrupt distribution modes (Round Robin, send to
one cpu only etc) whcih in turn map to affinity setting. When doing the DT
binding, we decided to add that this to DT to get the "seed" value for affinity -
which user could optionally changed after boot. This seemed like a benign design
choice at the time.

Thx,
-Vineet
Marc Zyngier Oct. 26, 2016, 4:36 p.m. UTC | #4
On Wed, Oct 26 2016 at 05:17:34 PM, Vineet Gupta <Vineet.Gupta1@synopsys.com> wrote:
> On 10/26/2016 07:05 AM, Marc Zyngier wrote:
>> It definitely feels weird to encode the interrupt affinity in the DT
>> (the kernel and possible userspace usually know much better than the
>> firmware). What is the actual reason for storing the affinity there?
>
> The IDU intc supports various interrupt distribution modes (Round
> Robin, send to one cpu only etc) whcih in turn map to affinity
> setting. When doing the DT binding, we decided to add that this to DT
> to get the "seed" value for affinity - which user could optionally
> changed after boot. This seemed like a benign design choice at the
> time.

Right. But is this initial setting something that the kernel has to
absolutely honor? The usual behavior is to let kernel pick something
sensible, and let the user mess with it afterwards.

Is there any part of the kernel that would otherwise depend on this
affinity being set to a particular mode? If the answer is "none", then I
believe we can safely ignore that part of the binding (and maybe
deprecate it in the documentation).

Thanks,

	M.
Vineet Gupta Oct. 26, 2016, 5:21 p.m. UTC | #5
On 10/26/2016 09:36 AM, Marc Zyngier wrote:
> On Wed, Oct 26 2016 at 05:17:34 PM, Vineet Gupta <Vineet.Gupta1@synopsys.com> wrote:
>> On 10/26/2016 07:05 AM, Marc Zyngier wrote:
>>> It definitely feels weird to encode the interrupt affinity in the DT
>>> (the kernel and possible userspace usually know much better than the
>>> firmware). What is the actual reason for storing the affinity there?
>>
>> The IDU intc supports various interrupt distribution modes (Round
>> Robin, send to one cpu only etc) whcih in turn map to affinity
>> setting. When doing the DT binding, we decided to add that this to DT
>> to get the "seed" value for affinity - which user could optionally
>> changed after boot. This seemed like a benign design choice at the
>> time.
> 
> Right. But is this initial setting something that the kernel has to
> absolutely honor? 

Not necessarily.

> The usual behavior is to let kernel pick something
> sensible, and let the user mess with it afterwards.

Right !


> Is there any part of the kernel that would otherwise depend on this
> affinity being set to a particular mode? If the answer is "none", then I
> believe we can safely ignore that part of the binding (and maybe
> deprecate it in the documentation).

Not really. It was relevant for initial bring up of IDU software and hardware.
e.g. checking that uart behind idu works fine in both modes for very first user
mode prints, which is before you could make the init script cmds to change the
affinity etc. But that bridge has long been crossed.

So agree that we will ignore the affinity settings from DT and deprecate the binding.

Thx for steering us in the right direction. Much appreciated.

-Vineet
diff mbox

Patch

diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 51a218c..090f0a1 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -12,12 +12,15 @@ 
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
 #include <asm/irqflags-arcv2.h>
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
+static unsigned long idu_cirq_to_dest[CONFIG_ARC_NUMBER_OF_INTERRUPTS];
 
 static DEFINE_RAW_SPINLOCK(mcip_lock);
 
@@ -232,9 +235,15 @@  static void idu_cascade_isr(struct irq_desc *desc)
 
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
 {
+	cpumask_t mask;
+
 	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
 	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
 
+	cpumask_clear(&mask);
+	cpumask_bits(&mask)[0] |= idu_cirq_to_dest[hwirq];
+	irq_set_affinity(virq, &mask);
+
 	return 0;
 }
 
@@ -252,8 +261,7 @@  static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
 	if (distri == 0) {
 		/* 0 - Round Robin to all cpus, otherwise 1 bit per core */
 		raw_spin_lock_irqsave(&mcip_lock, flags);
-		idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
-		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+		idu_cirq_to_dest[hwirq] = BIT(num_online_cpus()) - 1;
 		raw_spin_unlock_irqrestore(&mcip_lock, flags);
 	} else {
 		/*
@@ -267,8 +275,7 @@  static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
 				hwirq, cpu);
 
 		raw_spin_lock_irqsave(&mcip_lock, flags);
-		idu_set_dest(hwirq, cpu);
-		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
+		idu_cirq_to_dest[hwirq] = cpu;
 		raw_spin_unlock_irqrestore(&mcip_lock, flags);
 	}