Patchwork [5/5] pseries: Move XICS initialization before cpu initialization

login
register
mail settings
Submitter David Gibson
Date March 14, 2013, 1:53 a.m.
Message ID <1363226008-26639-6-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/227425/
State New
Headers show

Comments

David Gibson - March 14, 2013, 1:53 a.m.
Currently, the pseries machine initializes the cpus, then the XICS
interrupt controller.  However, to support the upcoming in-kernel XICS
implementation we will need to initialize the irq controller before the
vcpus.  This patch makes the necesssary rearrangement.  This means the
xics init code can no longer auto-detect the number of cpus ("interrupt
servers" in XICS terminology) and so we must pass that in explicitly from
the platform code.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c |   12 +++++++-----
 hw/ppc/xics.c  |   57 +++++++++++++++++++++++++-------------------------------
 hw/xics.h      |    3 ++-
 3 files changed, 34 insertions(+), 38 deletions(-)
Alexander Graf - March 15, 2013, 12:33 p.m.
On 14.03.2013, at 02:53, David Gibson wrote:

> Currently, the pseries machine initializes the cpus, then the XICS
> interrupt controller.  However, to support the upcoming in-kernel XICS
> implementation we will need to initialize the irq controller before the
> vcpus.  This patch makes the necesssary rearrangement.  This means the

We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:

  * create vcpus
  * create XICS
  * foreach (vcpu)
      * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)

However, that means we still need to know the maximum number of supported vcpus during the create phase. That number can be bigger than smp_cpus though, since you probably want to support hotplug add of CPUs later on.

Can't we just make the number of supported "interrupt servers" a constant?


Alex

> xics init code can no longer auto-detect the number of cpus ("interrupt
> servers" in XICS terminology) and so we must pass that in explicitly from
> the platform code.
> 
> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
> Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> hw/ppc/spapr.c |   12 +++++++-----
> hw/ppc/xics.c  |   57 +++++++++++++++++++++++++-------------------------------
> hw/xics.h      |    3 ++-
> 3 files changed, 34 insertions(+), 38 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 7293082..b2c9b42 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -791,6 +791,11 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>         spapr->htab_shift++;
>     }
> 
> +    /* Set up Interrupt Controller before we create the VCPUs */
> +    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
> +                                  XICS_IRQS);
> +    spapr->next_irq = XICS_IRQ_BASE;
> +
>     /* init CPUs */
>     if (cpu_model == NULL) {
>         cpu_model = kvm_enabled() ? "host" : "POWER7";
> @@ -803,6 +808,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>         }
>         env = &cpu->env;
> 
> +        xics_cpu_setup(spapr->icp, cpu);
> +
>         /* Set time-base frequency to 512 MHz */
>         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> 
> @@ -842,11 +849,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>     }
>     g_free(filename);
> 
> -
> -    /* Set up Interrupt Controller */
> -    spapr->icp = xics_system_init(XICS_IRQS);
> -    spapr->next_irq = XICS_IRQ_BASE;
> -
>     /* Set up EPOW events infrastructure */
>     spapr_events_init(spapr);
> 
> diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c
> index c3ef12f..374da5b 100644
> --- a/hw/ppc/xics.c
> +++ b/hw/ppc/xics.c
> @@ -521,45 +521,38 @@ static void xics_reset(void *opaque)
>     }
> }
> 
> -struct icp_state *xics_system_init(int nr_irqs)
> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> {
> -    CPUPPCState *env;
> -    CPUState *cpu;
> -    int max_server_num;
> -    struct icp_state *icp;
> -    struct ics_state *ics;
> +    CPUState *cs = CPU(cpu);
> +    CPUPPCState *env = &cpu->env;
> +    struct icp_server_state *ss = &icp->ss[cs->cpu_index];
> 
> -    max_server_num = -1;
> -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> -        cpu = CPU(ppc_env_get_cpu(env));
> -        if (cpu->cpu_index > max_server_num) {
> -            max_server_num = cpu->cpu_index;
> -        }
> -    }
> +    assert(cs->cpu_index < icp->nr_servers);
> 
> -    icp = g_malloc0(sizeof(*icp));
> -    icp->nr_servers = max_server_num + 1;
> -    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
> +    switch (PPC_INPUT(env)) {
> +    case PPC_FLAGS_INPUT_POWER7:
> +        ss->output = env->irq_inputs[POWER7_INPUT_INT];
> +        break;
> 
> -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> -        cpu = CPU(ppc_env_get_cpu(env));
> -        struct icp_server_state *ss = &icp->ss[cpu->cpu_index];
> +    case PPC_FLAGS_INPUT_970:
> +        ss->output = env->irq_inputs[PPC970_INPUT_INT];
> +        break;
> 
> -        switch (PPC_INPUT(env)) {
> -        case PPC_FLAGS_INPUT_POWER7:
> -            ss->output = env->irq_inputs[POWER7_INPUT_INT];
> -            break;
> +    default:
> +        fprintf(stderr, "XICS interrupt controller does not support this CPU "
> +                "bus model\n");
> +        abort();
> +    }
> +}
> 
> -        case PPC_FLAGS_INPUT_970:
> -            ss->output = env->irq_inputs[PPC970_INPUT_INT];
> -            break;
> +struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> +{
> +    struct icp_state *icp;
> +    struct ics_state *ics;
> 
> -        default:
> -            hw_error("XICS interrupt model does not support this CPU bus "
> -                     "model\n");
> -            exit(1);
> -        }
> -    }
> +    icp = g_malloc0(sizeof(*icp));
> +    icp->nr_servers = nr_servers;
> +    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
> 
>     ics = g_malloc0(sizeof(*ics));
>     ics->nr_irqs = nr_irqs;
> diff --git a/hw/xics.h b/hw/xics.h
> index c3bf008..6bce042 100644
> --- a/hw/xics.h
> +++ b/hw/xics.h
> @@ -35,6 +35,7 @@ struct icp_state;
> qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
> void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
> 
> -struct icp_state *xics_system_init(int nr_irqs);
> +struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> 
> #endif /* __XICS_H__ */
> -- 
> 1.7.10.4
>
Benjamin Herrenschmidt - March 16, 2013, 3:14 a.m.
On Fri, 2013-03-15 at 13:33 +0100, Alexander Graf wrote:
> On 14.03.2013, at 02:53, David Gibson wrote:
> 
> > Currently, the pseries machine initializes the cpus, then the XICS
> > interrupt controller.  However, to support the upcoming in-kernel XICS
> > implementation we will need to initialize the irq controller before the
> > vcpus.  This patch makes the necesssary rearrangement.  This means the
> 
> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
> 
>   * create vcpus
>   * create XICS
>   * foreach (vcpu)
>       * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)

This is stupid. Why have the VCPU initialize itself for non-kernel
interrupts and *then* switch it over ?

You guys are tiring me of needing about 2 years to iron out a simple
API just to end up with the worst possible crap in the end.

Ben.

> However, that means we still need to know the maximum number of supported vcpus during the create phase. That number can be bigger than smp_cpus though, since you probably want to support hotplug add of CPUs later on.
> 
> Can't we just make the number of supported "interrupt servers" a constant?
> 
> 
> Alex
> 
> > xics init code can no longer auto-detect the number of cpus ("interrupt
> > servers" in XICS terminology) and so we must pass that in explicitly from
> > the platform code.
> > 
> > Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
> > Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > hw/ppc/spapr.c |   12 +++++++-----
> > hw/ppc/xics.c  |   57 +++++++++++++++++++++++++-------------------------------
> > hw/xics.h      |    3 ++-
> > 3 files changed, 34 insertions(+), 38 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 7293082..b2c9b42 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -791,6 +791,11 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> >         spapr->htab_shift++;
> >     }
> > 
> > +    /* Set up Interrupt Controller before we create the VCPUs */
> > +    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
> > +                                  XICS_IRQS);
> > +    spapr->next_irq = XICS_IRQ_BASE;
> > +
> >     /* init CPUs */
> >     if (cpu_model == NULL) {
> >         cpu_model = kvm_enabled() ? "host" : "POWER7";
> > @@ -803,6 +808,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> >         }
> >         env = &cpu->env;
> > 
> > +        xics_cpu_setup(spapr->icp, cpu);
> > +
> >         /* Set time-base frequency to 512 MHz */
> >         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> > 
> > @@ -842,11 +849,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> >     }
> >     g_free(filename);
> > 
> > -
> > -    /* Set up Interrupt Controller */
> > -    spapr->icp = xics_system_init(XICS_IRQS);
> > -    spapr->next_irq = XICS_IRQ_BASE;
> > -
> >     /* Set up EPOW events infrastructure */
> >     spapr_events_init(spapr);
> > 
> > diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c
> > index c3ef12f..374da5b 100644
> > --- a/hw/ppc/xics.c
> > +++ b/hw/ppc/xics.c
> > @@ -521,45 +521,38 @@ static void xics_reset(void *opaque)
> >     }
> > }
> > 
> > -struct icp_state *xics_system_init(int nr_irqs)
> > +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> > {
> > -    CPUPPCState *env;
> > -    CPUState *cpu;
> > -    int max_server_num;
> > -    struct icp_state *icp;
> > -    struct ics_state *ics;
> > +    CPUState *cs = CPU(cpu);
> > +    CPUPPCState *env = &cpu->env;
> > +    struct icp_server_state *ss = &icp->ss[cs->cpu_index];
> > 
> > -    max_server_num = -1;
> > -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> > -        cpu = CPU(ppc_env_get_cpu(env));
> > -        if (cpu->cpu_index > max_server_num) {
> > -            max_server_num = cpu->cpu_index;
> > -        }
> > -    }
> > +    assert(cs->cpu_index < icp->nr_servers);
> > 
> > -    icp = g_malloc0(sizeof(*icp));
> > -    icp->nr_servers = max_server_num + 1;
> > -    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
> > +    switch (PPC_INPUT(env)) {
> > +    case PPC_FLAGS_INPUT_POWER7:
> > +        ss->output = env->irq_inputs[POWER7_INPUT_INT];
> > +        break;
> > 
> > -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> > -        cpu = CPU(ppc_env_get_cpu(env));
> > -        struct icp_server_state *ss = &icp->ss[cpu->cpu_index];
> > +    case PPC_FLAGS_INPUT_970:
> > +        ss->output = env->irq_inputs[PPC970_INPUT_INT];
> > +        break;
> > 
> > -        switch (PPC_INPUT(env)) {
> > -        case PPC_FLAGS_INPUT_POWER7:
> > -            ss->output = env->irq_inputs[POWER7_INPUT_INT];
> > -            break;
> > +    default:
> > +        fprintf(stderr, "XICS interrupt controller does not support this CPU "
> > +                "bus model\n");
> > +        abort();
> > +    }
> > +}
> > 
> > -        case PPC_FLAGS_INPUT_970:
> > -            ss->output = env->irq_inputs[PPC970_INPUT_INT];
> > -            break;
> > +struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> > +{
> > +    struct icp_state *icp;
> > +    struct ics_state *ics;
> > 
> > -        default:
> > -            hw_error("XICS interrupt model does not support this CPU bus "
> > -                     "model\n");
> > -            exit(1);
> > -        }
> > -    }
> > +    icp = g_malloc0(sizeof(*icp));
> > +    icp->nr_servers = nr_servers;
> > +    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
> > 
> >     ics = g_malloc0(sizeof(*ics));
> >     ics->nr_irqs = nr_irqs;
> > diff --git a/hw/xics.h b/hw/xics.h
> > index c3bf008..6bce042 100644
> > --- a/hw/xics.h
> > +++ b/hw/xics.h
> > @@ -35,6 +35,7 @@ struct icp_state;
> > qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
> > void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
> > 
> > -struct icp_state *xics_system_init(int nr_irqs);
> > +struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
> > +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> > 
> > #endif /* __XICS_H__ */
> > -- 
> > 1.7.10.4
> >
Alexander Graf - March 16, 2013, 5:33 a.m.
Am 16.03.2013 um 04:14 schrieb Benjamin Herrenschmidt <benh@kernel.crashing.org>:

> On Fri, 2013-03-15 at 13:33 +0100, Alexander Graf wrote:
>> On 14.03.2013, at 02:53, David Gibson wrote:
>> 
>>> Currently, the pseries machine initializes the cpus, then the XICS
>>> interrupt controller.  However, to support the upcoming in-kernel XICS
>>> implementation we will need to initialize the irq controller before the
>>> vcpus.  This patch makes the necesssary rearrangement.  This means the
>> 
>> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
>> 
>>  * create vcpus
>>  * create XICS
>>  * foreach (vcpu)
>>      * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
> 
> This is stupid. Why have the VCPU initialize itself for non-kernel
> interrupts and *then* switch it over ?

Because non-kernel initialization is a nop.

Alex

> You guys are tiring me of needing about 2 years to iron out a simple
> API just to end up with the worst possible crap in the end.
> 
> Ben.
> 
>> However, that means we still need to know the maximum number of supported vcpus during the create phase. That number can be bigger than smp_cpus though, since you probably want to support hotplug add of CPUs later on.
>> 
>> Can't we just make the number of supported "interrupt servers" a constant?
>> 
>> 
>> Alex
>> 
>>> xics init code can no longer auto-detect the number of cpus ("interrupt
>>> servers" in XICS terminology) and so we must pass that in explicitly from
>>> the platform code.
>>> 
>>> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
>>> Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> ---
>>> hw/ppc/spapr.c |   12 +++++++-----
>>> hw/ppc/xics.c  |   57 +++++++++++++++++++++++++-------------------------------
>>> hw/xics.h      |    3 ++-
>>> 3 files changed, 34 insertions(+), 38 deletions(-)
>>> 
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 7293082..b2c9b42 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -791,6 +791,11 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>        spapr->htab_shift++;
>>>    }
>>> 
>>> +    /* Set up Interrupt Controller before we create the VCPUs */
>>> +    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
>>> +                                  XICS_IRQS);
>>> +    spapr->next_irq = XICS_IRQ_BASE;
>>> +
>>>    /* init CPUs */
>>>    if (cpu_model == NULL) {
>>>        cpu_model = kvm_enabled() ? "host" : "POWER7";
>>> @@ -803,6 +808,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>        }
>>>        env = &cpu->env;
>>> 
>>> +        xics_cpu_setup(spapr->icp, cpu);
>>> +
>>>        /* Set time-base frequency to 512 MHz */
>>>        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
>>> 
>>> @@ -842,11 +849,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>    }
>>>    g_free(filename);
>>> 
>>> -
>>> -    /* Set up Interrupt Controller */
>>> -    spapr->icp = xics_system_init(XICS_IRQS);
>>> -    spapr->next_irq = XICS_IRQ_BASE;
>>> -
>>>    /* Set up EPOW events infrastructure */
>>>    spapr_events_init(spapr);
>>> 
>>> diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c
>>> index c3ef12f..374da5b 100644
>>> --- a/hw/ppc/xics.c
>>> +++ b/hw/ppc/xics.c
>>> @@ -521,45 +521,38 @@ static void xics_reset(void *opaque)
>>>    }
>>> }
>>> 
>>> -struct icp_state *xics_system_init(int nr_irqs)
>>> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>> {
>>> -    CPUPPCState *env;
>>> -    CPUState *cpu;
>>> -    int max_server_num;
>>> -    struct icp_state *icp;
>>> -    struct ics_state *ics;
>>> +    CPUState *cs = CPU(cpu);
>>> +    CPUPPCState *env = &cpu->env;
>>> +    struct icp_server_state *ss = &icp->ss[cs->cpu_index];
>>> 
>>> -    max_server_num = -1;
>>> -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>> -        cpu = CPU(ppc_env_get_cpu(env));
>>> -        if (cpu->cpu_index > max_server_num) {
>>> -            max_server_num = cpu->cpu_index;
>>> -        }
>>> -    }
>>> +    assert(cs->cpu_index < icp->nr_servers);
>>> 
>>> -    icp = g_malloc0(sizeof(*icp));
>>> -    icp->nr_servers = max_server_num + 1;
>>> -    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>>> +    switch (PPC_INPUT(env)) {
>>> +    case PPC_FLAGS_INPUT_POWER7:
>>> +        ss->output = env->irq_inputs[POWER7_INPUT_INT];
>>> +        break;
>>> 
>>> -    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>> -        cpu = CPU(ppc_env_get_cpu(env));
>>> -        struct icp_server_state *ss = &icp->ss[cpu->cpu_index];
>>> +    case PPC_FLAGS_INPUT_970:
>>> +        ss->output = env->irq_inputs[PPC970_INPUT_INT];
>>> +        break;
>>> 
>>> -        switch (PPC_INPUT(env)) {
>>> -        case PPC_FLAGS_INPUT_POWER7:
>>> -            ss->output = env->irq_inputs[POWER7_INPUT_INT];
>>> -            break;
>>> +    default:
>>> +        fprintf(stderr, "XICS interrupt controller does not support this CPU "
>>> +                "bus model\n");
>>> +        abort();
>>> +    }
>>> +}
>>> 
>>> -        case PPC_FLAGS_INPUT_970:
>>> -            ss->output = env->irq_inputs[PPC970_INPUT_INT];
>>> -            break;
>>> +struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>>> +{
>>> +    struct icp_state *icp;
>>> +    struct ics_state *ics;
>>> 
>>> -        default:
>>> -            hw_error("XICS interrupt model does not support this CPU bus "
>>> -                     "model\n");
>>> -            exit(1);
>>> -        }
>>> -    }
>>> +    icp = g_malloc0(sizeof(*icp));
>>> +    icp->nr_servers = nr_servers;
>>> +    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>>> 
>>>    ics = g_malloc0(sizeof(*ics));
>>>    ics->nr_irqs = nr_irqs;
>>> diff --git a/hw/xics.h b/hw/xics.h
>>> index c3bf008..6bce042 100644
>>> --- a/hw/xics.h
>>> +++ b/hw/xics.h
>>> @@ -35,6 +35,7 @@ struct icp_state;
>>> qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
>>> void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
>>> 
>>> -struct icp_state *xics_system_init(int nr_irqs);
>>> +struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
>>> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>>> 
>>> #endif /* __XICS_H__ */
>>> -- 
>>> 1.7.10.4
> 
>
Benjamin Herrenschmidt - March 16, 2013, 5:41 a.m.
On Sat, 2013-03-16 at 06:33 +0100, Alexander Graf wrote:
> >> We're changing that notion in the in-kernel XICS discussions.  The
> flow will look like this:
> >> 
> >>  * create vcpus
> >>  * create XICS
> >>  * foreach (vcpu)
> >>      * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
> > 
> > This is stupid. Why have the VCPU initialize itself for non-kernel
> > interrupts and *then* switch it over ?
> 
> Because non-kernel initialization is a nop.

And ?

Ben.
Alexander Graf - March 16, 2013, 6:07 a.m.
Am 16.03.2013 um 06:41 schrieb Benjamin Herrenschmidt <benh@kernel.crashing.org>:

> On Sat, 2013-03-16 at 06:33 +0100, Alexander Graf wrote:
>>>> We're changing that notion in the in-kernel XICS discussions.  The
>> flow will look like this:
>>>> 
>>>> * create vcpus
>>>> * create XICS
>>>> * foreach (vcpu)
>>>>     * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
>>> 
>>> This is stupid. Why have the VCPU initialize itself for non-kernel
>>> interrupts and *then* switch it over ?
>> 
>> Because non-kernel initialization is a nop.
> 
> And ?

Compared to the previous aproach of a vm wide irq type, we win:

  - less vm global state
  - possibility of defining the 'cpu nr' from the pic's point of view
  - the same mechanism and code paths for hotplug and non-hotplug
  - possibility of supporting multiple pics
  - less implicit assumptions

While the code changes required to make this work should be minimal over the "set irq architecture" approach. It really makes life easier for everyone.


Alex
David Gibson - March 18, 2013, 2:55 a.m.
On Fri, Mar 15, 2013 at 01:33:18PM +0100, Alexander Graf wrote:
> 
> On 14.03.2013, at 02:53, David Gibson wrote:
> 
> > Currently, the pseries machine initializes the cpus, then the XICS
> > interrupt controller.  However, to support the upcoming in-kernel XICS
> > implementation we will need to initialize the irq controller before the
> > vcpus.  This patch makes the necesssary rearrangement.  This means the
> 
> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
> 
>   * create vcpus
>   * create XICS
>   * foreach (vcpu)
>       * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
> 
> However, that means we still need to know the maximum number of
> supported vcpus during the create phase. That number can be bigger
> than smp_cpus though, since you probably want to support hotplug add
> of CPUs later on.
> 
> Can't we just make the number of supported "interrupt servers" a
> constant?

I suppose, but we need an allocation for each one, so its a bit ugly.
In any case although the comment is a bit out of date, this patch also
creates a logical place to put per-cpu XICS initialization which we
will still need for the new interface.
Alexander Graf - March 18, 2013, 3:12 a.m.
On 18.03.2013, at 03:55, David Gibson wrote:

> On Fri, Mar 15, 2013 at 01:33:18PM +0100, Alexander Graf wrote:
>> 
>> On 14.03.2013, at 02:53, David Gibson wrote:
>> 
>>> Currently, the pseries machine initializes the cpus, then the XICS
>>> interrupt controller.  However, to support the upcoming in-kernel XICS
>>> implementation we will need to initialize the irq controller before the
>>> vcpus.  This patch makes the necesssary rearrangement.  This means the
>> 
>> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
>> 
>>  * create vcpus
>>  * create XICS
>>  * foreach (vcpu)
>>      * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
>> 
>> However, that means we still need to know the maximum number of
>> supported vcpus during the create phase. That number can be bigger
>> than smp_cpus though, since you probably want to support hotplug add
>> of CPUs later on.
>> 
>> Can't we just make the number of supported "interrupt servers" a
>> constant?
> 
> I suppose, but we need an allocation for each one, so its a bit ugly.
> In any case although the comment is a bit out of date, this patch also
> creates a logical place to put per-cpu XICS initialization which we
> will still need for the new interface.

So how would you model CPU hotplug add?


Alex
David Gibson - March 18, 2013, 3:38 a.m.
On Mon, Mar 18, 2013 at 04:12:11AM +0100, Alexander Graf wrote:
> 
> On 18.03.2013, at 03:55, David Gibson wrote:
> 
> > On Fri, Mar 15, 2013 at 01:33:18PM +0100, Alexander Graf wrote:
> >> 
> >> On 14.03.2013, at 02:53, David Gibson wrote:
> >> 
> >>> Currently, the pseries machine initializes the cpus, then the XICS
> >>> interrupt controller.  However, to support the upcoming in-kernel XICS
> >>> implementation we will need to initialize the irq controller before the
> >>> vcpus.  This patch makes the necesssary rearrangement.  This means the
> >> 
> >> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
> >> 
> >>  * create vcpus
> >>  * create XICS
> >>  * foreach (vcpu)
> >>      * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
> >> 
> >> However, that means we still need to know the maximum number of
> >> supported vcpus during the create phase. That number can be bigger
> >> than smp_cpus though, since you probably want to support hotplug add
> >> of CPUs later on.
> >> 
> >> Can't we just make the number of supported "interrupt servers" a
> >> constant?
> > 
> > I suppose, but we need an allocation for each one, so its a bit ugly.
> > In any case although the comment is a bit out of date, this patch also
> > creates a logical place to put per-cpu XICS initialization which we
> > will still need for the new interface.
> 
> So how would you model CPU hotplug add?

Add headroom to the XICS setup based on whatever information we have
about maximum pluggable CPUs.  To use the PAPR hotplug interfaces we
already need a notion of max # CPUs, we can't have that just be open
ended.  We'd also add a call to xics_cpu_setup() to the hotplug add
path, obviously.
Alexander Graf - March 21, 2013, 10:01 a.m.
On 18.03.2013, at 04:38, David Gibson wrote:

> On Mon, Mar 18, 2013 at 04:12:11AM +0100, Alexander Graf wrote:
>> 
>> On 18.03.2013, at 03:55, David Gibson wrote:
>> 
>>> On Fri, Mar 15, 2013 at 01:33:18PM +0100, Alexander Graf wrote:
>>>> 
>>>> On 14.03.2013, at 02:53, David Gibson wrote:
>>>> 
>>>>> Currently, the pseries machine initializes the cpus, then the XICS
>>>>> interrupt controller.  However, to support the upcoming in-kernel XICS
>>>>> implementation we will need to initialize the irq controller before the
>>>>> vcpus.  This patch makes the necesssary rearrangement.  This means the
>>>> 
>>>> We're changing that notion in the in-kernel XICS discussions.  The flow will look like this:
>>>> 
>>>> * create vcpus
>>>> * create XICS
>>>> * foreach (vcpu)
>>>>     * enable_cap(vcpu, CAP_XICS_SERVER, xics_handle)
>>>> 
>>>> However, that means we still need to know the maximum number of
>>>> supported vcpus during the create phase. That number can be bigger
>>>> than smp_cpus though, since you probably want to support hotplug add
>>>> of CPUs later on.
>>>> 
>>>> Can't we just make the number of supported "interrupt servers" a
>>>> constant?
>>> 
>>> I suppose, but we need an allocation for each one, so its a bit ugly.
>>> In any case although the comment is a bit out of date, this patch also
>>> creates a logical place to put per-cpu XICS initialization which we
>>> will still need for the new interface.
>> 
>> So how would you model CPU hotplug add?
> 
> Add headroom to the XICS setup based on whatever information we have
> about maximum pluggable CPUs.  To use the PAPR hotplug interfaces we
> already need a notion of max # CPUs, we can't have that just be open
> ended.  We'd also add a call to xics_cpu_setup() to the hotplug add
> path, obviously.

Yeah. So for that you also need the different code order. Sorry for the confusion :).

Thanks, applied to ppc-next.


Alex

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7293082..b2c9b42 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -791,6 +791,11 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
         spapr->htab_shift++;
     }
 
+    /* Set up Interrupt Controller before we create the VCPUs */
+    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
+                                  XICS_IRQS);
+    spapr->next_irq = XICS_IRQ_BASE;
+
     /* init CPUs */
     if (cpu_model == NULL) {
         cpu_model = kvm_enabled() ? "host" : "POWER7";
@@ -803,6 +808,8 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
         }
         env = &cpu->env;
 
+        xics_cpu_setup(spapr->icp, cpu);
+
         /* Set time-base frequency to 512 MHz */
         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
 
@@ -842,11 +849,6 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
     }
     g_free(filename);
 
-
-    /* Set up Interrupt Controller */
-    spapr->icp = xics_system_init(XICS_IRQS);
-    spapr->next_irq = XICS_IRQ_BASE;
-
     /* Set up EPOW events infrastructure */
     spapr_events_init(spapr);
 
diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c
index c3ef12f..374da5b 100644
--- a/hw/ppc/xics.c
+++ b/hw/ppc/xics.c
@@ -521,45 +521,38 @@  static void xics_reset(void *opaque)
     }
 }
 
-struct icp_state *xics_system_init(int nr_irqs)
+void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
 {
-    CPUPPCState *env;
-    CPUState *cpu;
-    int max_server_num;
-    struct icp_state *icp;
-    struct ics_state *ics;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    struct icp_server_state *ss = &icp->ss[cs->cpu_index];
 
-    max_server_num = -1;
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        cpu = CPU(ppc_env_get_cpu(env));
-        if (cpu->cpu_index > max_server_num) {
-            max_server_num = cpu->cpu_index;
-        }
-    }
+    assert(cs->cpu_index < icp->nr_servers);
 
-    icp = g_malloc0(sizeof(*icp));
-    icp->nr_servers = max_server_num + 1;
-    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
+    switch (PPC_INPUT(env)) {
+    case PPC_FLAGS_INPUT_POWER7:
+        ss->output = env->irq_inputs[POWER7_INPUT_INT];
+        break;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        cpu = CPU(ppc_env_get_cpu(env));
-        struct icp_server_state *ss = &icp->ss[cpu->cpu_index];
+    case PPC_FLAGS_INPUT_970:
+        ss->output = env->irq_inputs[PPC970_INPUT_INT];
+        break;
 
-        switch (PPC_INPUT(env)) {
-        case PPC_FLAGS_INPUT_POWER7:
-            ss->output = env->irq_inputs[POWER7_INPUT_INT];
-            break;
+    default:
+        fprintf(stderr, "XICS interrupt controller does not support this CPU "
+                "bus model\n");
+        abort();
+    }
+}
 
-        case PPC_FLAGS_INPUT_970:
-            ss->output = env->irq_inputs[PPC970_INPUT_INT];
-            break;
+struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
+{
+    struct icp_state *icp;
+    struct ics_state *ics;
 
-        default:
-            hw_error("XICS interrupt model does not support this CPU bus "
-                     "model\n");
-            exit(1);
-        }
-    }
+    icp = g_malloc0(sizeof(*icp));
+    icp->nr_servers = nr_servers;
+    icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
 
     ics = g_malloc0(sizeof(*ics));
     ics->nr_irqs = nr_irqs;
diff --git a/hw/xics.h b/hw/xics.h
index c3bf008..6bce042 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -35,6 +35,7 @@  struct icp_state;
 qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
 void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
 
-struct icp_state *xics_system_init(int nr_irqs);
+struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
+void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
 
 #endif /* __XICS_H__ */