diff mbox

[v4,15/20] ppc/xics: Add "native" XICS subclass

Message ID 1475479496-16158-16-git-send-email-clg@kaod.org
State New
Headers show

Commit Message

Cédric Le Goater Oct. 3, 2016, 7:24 a.m. UTC
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>

This provides access to the MMIO based Interrupt Presentation
Controllers (ICP) as found on a POWER8 system.

A new XICSNative class is introduced to hold the MMIO region of the
ICPs. It also makes use of a hash table to associate the ICPState of a
CPU with a HW processor id, as this is the server number presented in
the XIVEs.

The class routine 'find_icp' provide the way to do the lookups when
needed in the XICS base class.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[clg: - naming cleanups
      - replaced the use of xics_get_cpu_index_by_dt_id() by xics_find_icp()
      - added some qemu logging in case of error      
      - introduced a xics_native_find_icp routine to map icp index to
        cpu index      
      - moved sysbus mapping to chip ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---

 checkpatch complains on this one, but it seems to be a false positive :
 
 ERROR: spaces required around that '&' (ctx:WxV)
 #314: FILE: hw/intc/xics_native.c:246:
 +                        (gpointer) &xics->ss[cs->cpu_index]);

 default-configs/ppc64-softmmu.mak |   3 +-
 hw/intc/Makefile.objs             |   1 +
 hw/intc/xics_native.c             | 327 ++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h              |  19 +++
 include/hw/ppc/xics.h             |  24 +++
 5 files changed, 373 insertions(+), 1 deletion(-)
 create mode 100644 hw/intc/xics_native.c

Comments

David Gibson Oct. 14, 2016, 6:10 a.m. UTC | #1
On Mon, Oct 03, 2016 at 09:24:51AM +0200, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
> This provides access to the MMIO based Interrupt Presentation
> Controllers (ICP) as found on a POWER8 system.
> 
> A new XICSNative class is introduced to hold the MMIO region of the
> ICPs. It also makes use of a hash table to associate the ICPState of a
> CPU with a HW processor id, as this is the server number presented in
> the XIVEs.
> 
> The class routine 'find_icp' provide the way to do the lookups when
> needed in the XICS base class.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> [clg: - naming cleanups
>       - replaced the use of xics_get_cpu_index_by_dt_id() by xics_find_icp()
>       - added some qemu logging in case of error      
>       - introduced a xics_native_find_icp routine to map icp index to
>         cpu index      
>       - moved sysbus mapping to chip ]
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> 
>  checkpatch complains on this one, but it seems to be a false positive :
>  
>  ERROR: spaces required around that '&' (ctx:WxV)
>  #314: FILE: hw/intc/xics_native.c:246:
>  +                        (gpointer) &xics->ss[cs->cpu_index]);
> 
>  default-configs/ppc64-softmmu.mak |   3 +-
>  hw/intc/Makefile.objs             |   1 +
>  hw/intc/xics_native.c             | 327 ++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/pnv.h              |  19 +++
>  include/hw/ppc/xics.h             |  24 +++
>  5 files changed, 373 insertions(+), 1 deletion(-)
>  create mode 100644 hw/intc/xics_native.c
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index 67a9bcaa67fa..a22c93a48686 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -48,8 +48,9 @@ CONFIG_PLATFORM_BUS=y
>  CONFIG_ETSEC=y
>  CONFIG_LIBDECNUMBER=y
>  # For pSeries
> -CONFIG_XICS=$(CONFIG_PSERIES)
> +CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> +CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
>  CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
>  # For PReP
>  CONFIG_MC146818RTC=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 05ec21b21e0e..7be5dfc8347b 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -31,6 +31,7 @@ obj-$(CONFIG_RASPI) += bcm2835_ic.o bcm2836_control.o
>  obj-$(CONFIG_SH4) += sh_intc.o
>  obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> +obj-$(CONFIG_XICS_NATIVE) += xics_native.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
> new file mode 100644
> index 000000000000..16413d807f65
> --- /dev/null
> +++ b/hw/intc/xics_native.c
> @@ -0,0 +1,327 @@
> +/*
> + * QEMU PowerPC PowerNV machine model
> + *
> + * Native version of ICS/ICP
> + *
> + * Copyright (c) 2016, IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu-common.h"
> +#include "cpu.h"
> +#include "hw/hw.h"
> +#include "qemu/log.h"
> +#include "qapi/error.h"
> +
> +#include "hw/ppc/fdt.h"
> +#include "hw/ppc/xics.h"
> +#include "hw/ppc/pnv.h"
> +
> +#include <libfdt.h>
> +
> +static void xics_native_reset(void *opaque)
> +{
> +    device_reset(DEVICE(opaque));
> +}
> +
> +static void xics_native_initfn(Object *obj)
> +{
> +    XICSState *xics = XICS_COMMON(obj);
> +
> +    QLIST_INIT(&xics->ics);

This shouldn't be necessary, because it's done in the base class
initfn (which is called before the subclass initfns).

> +    /*
> +     * Let's not forget to register a reset handler else the ICPs
> +     * won't be initialized with the correct values. Trouble ahead !
> +     */
> +    qemu_register_reset(xics_native_reset, xics);

And. this shouldn't be necessary.  If you set dc->reset to the right
thing in class_init (which the xics base class should already do) then
the device model will automatically reset the device, you shouldn't
need an extra reset handler.

> +}
> +
> +static uint64_t xics_native_read(void *opaque, hwaddr addr, unsigned width)
> +{
> +    XICSState *s = opaque;
> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> +    uint64_t val = 0xffffffff;
> +    ICPState *ss;
> +
> +    ss = xics_find_icp(s, cpu_id);

IIUC, cpu_id is the hardware id here... so why aren't you using the
hash table you added to do this mapping?  (although see comments
elswhere, that I'm not sure that mapping belongs within xics).

Another option to avoid the lookup would be to register each icp page
as a separate MR, then you could set the opaque pointer directly to ss
instead of to the global xics.

> +    if (!ss) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
> +        return val;
> +    }
> +
> +    switch (addr & 0xffc) {
> +    case 0: /* poll */
> +        val = icp_ipoll(ss, NULL);
> +        if (byte0) {
> +            val >>= 24;
> +        } else if (width != 4) {
> +            goto bad_access;
> +        }
> +        break;
> +    case 4: /* xirr */
> +        if (byte0) {
> +            val = icp_ipoll(ss, NULL) >> 24;
> +        } else if (width == 4) {
> +            val = icp_accept(ss);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 12:
> +        if (byte0) {
> +            val = ss->mfrr;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 16:
> +        if (width == 4) {
> +            val = ss->links[0];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 20:
> +        if (width == 4) {
> +            val = ss->links[1];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 24:
> +        if (width == 4) {
> +            val = ss->links[2];
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    default:
> +bad_access:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
> +                      HWADDR_PRIx"/%d\n", addr, width);
> +    }
> +
> +    return val;
> +}
> +
> +static void xics_native_write(void *opaque, hwaddr addr, uint64_t val,
> +                        unsigned width)
> +{
> +    XICSState *s = opaque;
> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> +    ICPState *ss;
> +
> +    ss = xics_find_icp(s, cpu_id);
> +    if (!ss) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
> +        return;
> +    }
> +
> +    switch (addr & 0xffc) {
> +    case 4: /* xirr */
> +        if (byte0) {
> +            icp_set_cppr(s, cpu_id, val);
> +        } else if (width == 4) {
> +            icp_eoi(s, cpu_id, val);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 12:
> +        if (byte0) {
> +            icp_set_mfrr(s, cpu_id, val);
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 16:
> +        if (width == 4) {
> +            ss->links[0] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 20:
> +        if (width == 4) {
> +            ss->links[1] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    case 24:
> +        if (width == 4) {
> +            ss->links[2] = val;
> +        } else {
> +            goto bad_access;
> +        }
> +        break;
> +    default:
> +bad_access:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
> +                      HWADDR_PRIx"/%d\n", addr, width);
> +    }
> +}
> +
> +static const MemoryRegionOps xics_native_ops = {
> +    .read = xics_native_read,
> +    .write = xics_native_write,
> +    .valid.min_access_size = 1,
> +    .valid.max_access_size = 4,
> +    .impl.min_access_size = 1,
> +    .impl.max_access_size = 4,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +};
> +
> +static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
> +                                Error **errp)
> +{
> +    int i;
> +
> +    icp->nr_servers = nr_servers;
> +
> +    icp->ss = g_malloc0(icp->nr_servers * sizeof(ICPState));
> +    for (i = 0; i < icp->nr_servers; i++) {
> +        char name[32];
> +        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_ICP);
> +        snprintf(name, sizeof(name), "icp[%d]", i);
> +        object_property_add_child(OBJECT(icp), name, OBJECT(&icp->ss[i]),
> +                                  errp);

AFAICT this is identical to xics-spapr version and only difference to
xics-kvm is it uses TYPE_KVM_ICP.  We should look at fusing these -
maybe just having an icp typename in the xics class.

> +    }
> +}
> +
> +static void xics_native_realize(DeviceState *dev, Error **errp)
> +{
> +    XICSState *xics = XICS_COMMON(dev);
> +    XICSNative *xicsn = XICS_NATIVE(dev);
> +    Error *error = NULL;
> +    int i;
> +
> +    if (!xics->nr_servers) {
> +        error_setg(errp, "Number of servers needs to be greater than 0");
> +        return;
> +    }
> +
> +    for (i = 0; i < xics->nr_servers; i++) {
> +        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
> +                                 &error);
> +        if (error) {
> +            error_propagate(errp, error);
> +            return;
> +        }
> +    }
> +
> +    xicsn->pir_table = g_hash_table_new(g_direct_hash, g_direct_equal);

I'm not sure having this map inside xics makes sense.  Wouldn't it be
more widely useful to have a fast PIR -> cpu_index map at the machine
level?  That could also use the structure of the hwids to do a fast
lookup neatly (collapse core id with a small table, recombine with
chip and thread id).  A hash table seems a big hammer to throw at it.

> +    /* Register MMIO regions */
> +    memory_region_init_io(&xicsn->icp_mmio, OBJECT(dev), &xics_native_ops,
> +                          xicsn, "xics", PNV_XICS_SIZE);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xicsn->icp_mmio);
> +}
> +
> +static void xics_native_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
> +{
> +    CPUState *cs = CPU(cpu);
> +    CPUPPCState *env = &cpu->env;
> +    XICSNative *xicsn = XICS_NATIVE(xics);
> +
> +    assert(cs->cpu_index < xics->nr_servers);
> +    g_hash_table_insert(xicsn->pir_table, GINT_TO_POINTER(env->spr[SPR_PIR]),
> +                        (gpointer) &xics->ss[cs->cpu_index]);
> +}
> +
> +static ICPState *xics_native_find_icp(XICSState *xics, int pir)
> +{
> +    XICSNative *xicsn = XICS_NATIVE(xics);
> +    gpointer key, value;
> +    gboolean found = g_hash_table_lookup_extended(xicsn->pir_table,
> +                                GINT_TO_POINTER(pir), &key, &value);
> +
> +    assert(found);
> +
> +    return (ICPState *) value;
> +}
> +
> +static void xics_native_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
> +
> +    dc->realize = xics_native_realize;
> +    xsc->set_nr_servers = xics_set_nr_servers;
> +    xsc->cpu_setup = xics_native_cpu_setup;
> +    xsc->find_icp = xics_native_find_icp;
> +}
> +
> +static const TypeInfo xics_native_info = {
> +    .name          = TYPE_XICS_NATIVE,
> +    .parent        = TYPE_XICS_COMMON,
> +    .instance_size = sizeof(XICSNative),
> +    .class_size = sizeof(XICSStateClass),
> +    .class_init    = xics_native_class_init,
> +    .instance_init = xics_native_initfn,
> +};
> +
> +static void xics_native_register_types(void)
> +{
> +    type_register_static(&xics_native_info);
> +}
> +
> +type_init(xics_native_register_types)
> +
> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
> +                              uint32_t pir, uint32_t count)
> +{
> +    uint64_t addr;
> +    char *name;
> +    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
> +    uint32_t irange[2], i, rsize;
> +    uint64_t *reg;
> +
> +    /*
> +     * TODO: add multichip ICP BAR
> +     */
> +    addr = PNV_XICS_BASE | (pir << 12);
> +
> +    irange[0] = cpu_to_be32(pir);
> +    irange[1] = cpu_to_be32(count);
> +
> +    rsize = sizeof(uint64_t) * 2 * count;
> +    reg = g_malloc(rsize);
> +    for (i = 0; i < count; i++) {
> +        reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
> +        reg[i * 2 + 1] = cpu_to_be64(0x1000);
> +    }
> +
> +    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
> +    offset = fdt_add_subnode(fdt, offset, name);
> +    _FDT(offset);
> +    g_free(name);
> +
> +    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
> +    _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
> +    _FDT((fdt_setprop_string(fdt, offset, "device_type",
> +                              "PowerPC-External-Interrupt-Presentation")));
> +    _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
> +    _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
> +                       irange, sizeof(irange))));
> +    _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
> +    g_free(reg);
> +}
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index 617c3fdd4f06..3f24b87d199b 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -125,4 +125,23 @@ typedef struct PnvMachineState {
>  #define PNV_XSCOM_BASE(chip)                                            \
>      (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
>  
> +/*
> + * XSCOM 0x20109CA defines the ICP BAR:
> + *
> + * 0:29   : bits 14 to 43 of address to define 1 MB region.
> + * 30     : 1 to enable ICP to receive loads/stores against its BAR region
> + * 31:63  : Constant 0
> + *
> + * Usually defined as :
> + *
> + *      0xffffe00200000000 -> 0x0003ffff80000000
> + *      0xffffe00600000000 -> 0x0003ffff80100000
> + *      0xffffe02200000000 -> 0x0003ffff80800000
> + *      0xffffe02600000000 -> 0x0003ffff80900000
> + *
> + * TODO: make a macro using the chip hw id ?
> + */
> +#define PNV_XICS_BASE         0x0003ffff80000000ull
> +#define PNV_XICS_SIZE         0x0000000000100000ull
> +
>  #endif /* _PPC_PNV_H */
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index dea9b92d4726..77ce786f000e 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -118,8 +118,27 @@ struct ICPState {
>      uint8_t mfrr;
>      qemu_irq output;
>      bool cap_irq_xics_enabled;
> +
> +    /*
> +     * for XICSNative (not used by Linux).
> +     */
> +    uint32_t links[3];
>  };
>  
> +#define TYPE_XICS_NATIVE "xics-native"
> +#define XICS_NATIVE(obj) OBJECT_CHECK(XICSNative, (obj), TYPE_XICS_NATIVE)
> +#define XICS_NATIVE_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
> +#define XICS_NATIVE_GET_CLASS(obj) \
> +     OBJECT_CLASS_CHECK(XICSStateClass, (obj), TYPE_XICS_NATIVE)
> +
> +typedef struct XICSNative {
> +    XICSState parent_obj;
> +
> +    GHashTable *pir_table;
> +    MemoryRegion icp_mmio;
> +} XICSNative;
> +
>  #define TYPE_ICS_BASE "ics-base"
>  #define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE)
>  
> @@ -210,4 +229,9 @@ void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
>  ICPState *xics_find_icp(XICSState *xics, int cpu_index);
>  void xics_insert_ics(XICSState *xics, ICSState *ics);
>  
> +typedef struct PnvChip PnvChip;
> +
> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
> +                              uint32_t base, uint32_t count);
> +
>  #endif /* XICS_H */
Cédric Le Goater Oct. 14, 2016, 9:40 a.m. UTC | #2
On 10/14/2016 08:10 AM, David Gibson wrote:
> On Mon, Oct 03, 2016 at 09:24:51AM +0200, Cédric Le Goater wrote:
>> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>>
>> This provides access to the MMIO based Interrupt Presentation
>> Controllers (ICP) as found on a POWER8 system.
>>
>> A new XICSNative class is introduced to hold the MMIO region of the
>> ICPs. It also makes use of a hash table to associate the ICPState of a
>> CPU with a HW processor id, as this is the server number presented in
>> the XIVEs.
>>
>> The class routine 'find_icp' provide the way to do the lookups when
>> needed in the XICS base class.
>>
>> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> [clg: - naming cleanups
>>       - replaced the use of xics_get_cpu_index_by_dt_id() by xics_find_icp()
>>       - added some qemu logging in case of error      
>>       - introduced a xics_native_find_icp routine to map icp index to
>>         cpu index      
>>       - moved sysbus mapping to chip ]
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>
>>  checkpatch complains on this one, but it seems to be a false positive :
>>  
>>  ERROR: spaces required around that '&' (ctx:WxV)
>>  #314: FILE: hw/intc/xics_native.c:246:
>>  +                        (gpointer) &xics->ss[cs->cpu_index]);
>>
>>  default-configs/ppc64-softmmu.mak |   3 +-
>>  hw/intc/Makefile.objs             |   1 +
>>  hw/intc/xics_native.c             | 327 ++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/pnv.h              |  19 +++
>>  include/hw/ppc/xics.h             |  24 +++
>>  5 files changed, 373 insertions(+), 1 deletion(-)
>>  create mode 100644 hw/intc/xics_native.c
>>
>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>> index 67a9bcaa67fa..a22c93a48686 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -48,8 +48,9 @@ CONFIG_PLATFORM_BUS=y
>>  CONFIG_ETSEC=y
>>  CONFIG_LIBDECNUMBER=y
>>  # For pSeries
>> -CONFIG_XICS=$(CONFIG_PSERIES)
>> +CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>> +CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
>>  CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
>>  # For PReP
>>  CONFIG_MC146818RTC=y
>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>> index 05ec21b21e0e..7be5dfc8347b 100644
>> --- a/hw/intc/Makefile.objs
>> +++ b/hw/intc/Makefile.objs
>> @@ -31,6 +31,7 @@ obj-$(CONFIG_RASPI) += bcm2835_ic.o bcm2836_control.o
>>  obj-$(CONFIG_SH4) += sh_intc.o
>>  obj-$(CONFIG_XICS) += xics.o
>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>> +obj-$(CONFIG_XICS_NATIVE) += xics_native.o
>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>> diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
>> new file mode 100644
>> index 000000000000..16413d807f65
>> --- /dev/null
>> +++ b/hw/intc/xics_native.c
>> @@ -0,0 +1,327 @@
>> +/*
>> + * QEMU PowerPC PowerNV machine model
>> + *
>> + * Native version of ICS/ICP
>> + *
>> + * Copyright (c) 2016, IBM Corporation.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "qemu-common.h"
>> +#include "cpu.h"
>> +#include "hw/hw.h"
>> +#include "qemu/log.h"
>> +#include "qapi/error.h"
>> +
>> +#include "hw/ppc/fdt.h"
>> +#include "hw/ppc/xics.h"
>> +#include "hw/ppc/pnv.h"
>> +
>> +#include <libfdt.h>
>> +
>> +static void xics_native_reset(void *opaque)
>> +{
>> +    device_reset(DEVICE(opaque));
>> +}
>> +
>> +static void xics_native_initfn(Object *obj)
>> +{
>> +    XICSState *xics = XICS_COMMON(obj);
>> +
>> +    QLIST_INIT(&xics->ics);
> 
> This shouldn't be necessary, because it's done in the base class
> initfn (which is called before the subclass initfns).

True. This is a left over. 

>> +    /*
>> +     * Let's not forget to register a reset handler else the ICPs
>> +     * won't be initialized with the correct values. Trouble ahead !
>> +     */
>> +    qemu_register_reset(xics_native_reset, xics);
> 
> And. this shouldn't be necessary.  If you set dc->reset to the right
> thing in class_init (which the xics base class should already do) then
> the device model will automatically reset the device, you shouldn't
> need an extra reset handler.

ah ! That is what I thought but it seems that ->reset is only called when 
the device is under a bus. When creating devices with qdev_create(),
sysbus is used as a default but this is not the case for the xics_native 
instance. 

For the story, as the reset was not called, the ICP were bogus and xics 
was wrongly doing IPIs ... I pulled my hair on this.

>> +}
>> +
>> +static uint64_t xics_native_read(void *opaque, hwaddr addr, unsigned width)
>> +{
>> +    XICSState *s = opaque;
>> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
>> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
>> +    uint64_t val = 0xffffffff;
>> +    ICPState *ss;
>> +
>> +    ss = xics_find_icp(s, cpu_id);
> 
> IIUC, cpu_id is the hardware id here... so why aren't you using the
> hash table you added to do this mapping?  

the lookup is done in the xics_find_icp() handler later.

> (although see comments elswhere, that I'm not sure that mapping belongs 
> within xics).

OK. I am open to any design :)

> Another option to avoid the lookup would be to register each icp page
> as a separate MR, then you could set the opaque pointer directly to ss
> instead of to the global xics.

Yes. This is much smarter. I like that !

In xics_native_realize(), we could loop on the array xics->ss and
create sub_memory regions for each ICP. cpu_index would be completely 
unused, which is even better.

We would need to change the API of the icp_set_*() calls to take 
an ICPState then, and not a XICSState as currently. The result would 
be more consistent I think but a backlink to the XICSState, or at 
least the ICSs, is needed for icp_resend().

What is your feeling on this ? before I start shuffling the code.

>> +    if (!ss) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
>> +        return val;
>> +    }
>> +
>> +    switch (addr & 0xffc) {
>> +    case 0: /* poll */
>> +        val = icp_ipoll(ss, NULL);
>> +        if (byte0) {
>> +            val >>= 24;
>> +        } else if (width != 4) {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 4: /* xirr */
>> +        if (byte0) {
>> +            val = icp_ipoll(ss, NULL) >> 24;
>> +        } else if (width == 4) {
>> +            val = icp_accept(ss);
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 12:
>> +        if (byte0) {
>> +            val = ss->mfrr;
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 16:
>> +        if (width == 4) {
>> +            val = ss->links[0];
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 20:
>> +        if (width == 4) {
>> +            val = ss->links[1];
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 24:
>> +        if (width == 4) {
>> +            val = ss->links[2];
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    default:
>> +bad_access:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
>> +                      HWADDR_PRIx"/%d\n", addr, width);
>> +    }
>> +
>> +    return val;
>> +}
>> +
>> +static void xics_native_write(void *opaque, hwaddr addr, uint64_t val,
>> +                        unsigned width)
>> +{
>> +    XICSState *s = opaque;
>> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
>> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
>> +    ICPState *ss;
>> +
>> +    ss = xics_find_icp(s, cpu_id);
>> +    if (!ss) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
>> +        return;
>> +    }
>> +
>> +    switch (addr & 0xffc) {
>> +    case 4: /* xirr */
>> +        if (byte0) {
>> +            icp_set_cppr(s, cpu_id, val);
>> +        } else if (width == 4) {
>> +            icp_eoi(s, cpu_id, val);
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 12:
>> +        if (byte0) {
>> +            icp_set_mfrr(s, cpu_id, val);
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 16:
>> +        if (width == 4) {
>> +            ss->links[0] = val;
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 20:
>> +        if (width == 4) {
>> +            ss->links[1] = val;
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    case 24:
>> +        if (width == 4) {
>> +            ss->links[2] = val;
>> +        } else {
>> +            goto bad_access;
>> +        }
>> +        break;
>> +    default:
>> +bad_access:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
>> +                      HWADDR_PRIx"/%d\n", addr, width);
>> +    }
>> +}
>> +
>> +static const MemoryRegionOps xics_native_ops = {
>> +    .read = xics_native_read,
>> +    .write = xics_native_write,
>> +    .valid.min_access_size = 1,
>> +    .valid.max_access_size = 4,
>> +    .impl.min_access_size = 1,
>> +    .impl.max_access_size = 4,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +};
>> +
>> +static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
>> +                                Error **errp)
>> +{
>> +    int i;
>> +
>> +    icp->nr_servers = nr_servers;
>> +
>> +    icp->ss = g_malloc0(icp->nr_servers * sizeof(ICPState));
>> +    for (i = 0; i < icp->nr_servers; i++) {
>> +        char name[32];
>> +        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_ICP);
>> +        snprintf(name, sizeof(name), "icp[%d]", i);
>> +        object_property_add_child(OBJECT(icp), name, OBJECT(&icp->ss[i]),
>> +                                  errp);
> 
> AFAICT this is identical to xics-spapr version and only difference to
> xics-kvm is it uses TYPE_KVM_ICP.  We should look at fusing these -
> maybe just having an icp typename in the xics class.

ok. I can add a preliminary patch for a common routine. 

>> +    }
>> +}
>> +
>> +static void xics_native_realize(DeviceState *dev, Error **errp)
>> +{
>> +    XICSState *xics = XICS_COMMON(dev);
>> +    XICSNative *xicsn = XICS_NATIVE(dev);
>> +    Error *error = NULL;
>> +    int i;
>> +
>> +    if (!xics->nr_servers) {
>> +        error_setg(errp, "Number of servers needs to be greater than 0");
>> +        return;
>> +    }
>> +
>> +    for (i = 0; i < xics->nr_servers; i++) {
>> +        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
>> +                                 &error);
>> +        if (error) {
>> +            error_propagate(errp, error);
>> +            return;
>> +        }
>> +    }
>> +
>> +    xicsn->pir_table = g_hash_table_new(g_direct_hash, g_direct_equal);
> 
> I'm not sure having this map inside xics makes sense.  Wouldn't it be
> more widely useful to have a fast PIR -> cpu_index map at the machine
> level? 

well, I would rather find a way of not using the cpu_index at all. We 
have HW cpu ids and ICPState to link together, if we could avoid the 
extra indexing layer, that would be better. If not, I would rather hide 
it deeply for people not to start using something which is a work around
IMO.

> That could also use the structure of the hwids to do a fast
> lookup neatly (collapse core id with a small table, recombine with
> chip and thread id).  A hash table seems a big hammer to throw at it.

yes. It was easy to use to start with but hopefully we will get rid of
it with your idea on the memory regions.

Thanks,

C. 


>> +    /* Register MMIO regions */
>> +    memory_region_init_io(&xicsn->icp_mmio, OBJECT(dev), &xics_native_ops,
>> +                          xicsn, "xics", PNV_XICS_SIZE);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xicsn->icp_mmio);
>> +}
>> +
>> +static void xics_native_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
>> +{
>> +    CPUState *cs = CPU(cpu);
>> +    CPUPPCState *env = &cpu->env;
>> +    XICSNative *xicsn = XICS_NATIVE(xics);
>> +
>> +    assert(cs->cpu_index < xics->nr_servers);
>> +    g_hash_table_insert(xicsn->pir_table, GINT_TO_POINTER(env->spr[SPR_PIR]),
>> +                        (gpointer) &xics->ss[cs->cpu_index]);
>> +}
>> +
>> +static ICPState *xics_native_find_icp(XICSState *xics, int pir)
>> +{
>> +    XICSNative *xicsn = XICS_NATIVE(xics);
>> +    gpointer key, value;
>> +    gboolean found = g_hash_table_lookup_extended(xicsn->pir_table,
>> +                                GINT_TO_POINTER(pir), &key, &value);
>> +
>> +    assert(found);
>> +
>> +    return (ICPState *) value;
>> +}
>> +
>> +static void xics_native_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
>> +
>> +    dc->realize = xics_native_realize;
>> +    xsc->set_nr_servers = xics_set_nr_servers;
>> +    xsc->cpu_setup = xics_native_cpu_setup;
>> +    xsc->find_icp = xics_native_find_icp;
>> +}
>> +
>> +static const TypeInfo xics_native_info = {
>> +    .name          = TYPE_XICS_NATIVE,
>> +    .parent        = TYPE_XICS_COMMON,
>> +    .instance_size = sizeof(XICSNative),
>> +    .class_size = sizeof(XICSStateClass),
>> +    .class_init    = xics_native_class_init,
>> +    .instance_init = xics_native_initfn,
>> +};
>> +
>> +static void xics_native_register_types(void)
>> +{
>> +    type_register_static(&xics_native_info);
>> +}
>> +
>> +type_init(xics_native_register_types)
>> +
>> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
>> +                              uint32_t pir, uint32_t count)
>> +{
>> +    uint64_t addr;
>> +    char *name;
>> +    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
>> +    uint32_t irange[2], i, rsize;
>> +    uint64_t *reg;
>> +
>> +    /*
>> +     * TODO: add multichip ICP BAR
>> +     */
>> +    addr = PNV_XICS_BASE | (pir << 12);
>> +
>> +    irange[0] = cpu_to_be32(pir);
>> +    irange[1] = cpu_to_be32(count);
>> +
>> +    rsize = sizeof(uint64_t) * 2 * count;
>> +    reg = g_malloc(rsize);
>> +    for (i = 0; i < count; i++) {
>> +        reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
>> +        reg[i * 2 + 1] = cpu_to_be64(0x1000);
>> +    }
>> +
>> +    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
>> +    offset = fdt_add_subnode(fdt, offset, name);
>> +    _FDT(offset);
>> +    g_free(name);
>> +
>> +    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
>> +    _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
>> +    _FDT((fdt_setprop_string(fdt, offset, "device_type",
>> +                              "PowerPC-External-Interrupt-Presentation")));
>> +    _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
>> +    _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
>> +                       irange, sizeof(irange))));
>> +    _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
>> +    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
>> +    g_free(reg);
>> +}
>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>> index 617c3fdd4f06..3f24b87d199b 100644
>> --- a/include/hw/ppc/pnv.h
>> +++ b/include/hw/ppc/pnv.h
>> @@ -125,4 +125,23 @@ typedef struct PnvMachineState {
>>  #define PNV_XSCOM_BASE(chip)                                            \
>>      (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
>>  
>> +/*
>> + * XSCOM 0x20109CA defines the ICP BAR:
>> + *
>> + * 0:29   : bits 14 to 43 of address to define 1 MB region.
>> + * 30     : 1 to enable ICP to receive loads/stores against its BAR region
>> + * 31:63  : Constant 0
>> + *
>> + * Usually defined as :
>> + *
>> + *      0xffffe00200000000 -> 0x0003ffff80000000
>> + *      0xffffe00600000000 -> 0x0003ffff80100000
>> + *      0xffffe02200000000 -> 0x0003ffff80800000
>> + *      0xffffe02600000000 -> 0x0003ffff80900000
>> + *
>> + * TODO: make a macro using the chip hw id ?
>> + */
>> +#define PNV_XICS_BASE         0x0003ffff80000000ull
>> +#define PNV_XICS_SIZE         0x0000000000100000ull
>> +
>>  #endif /* _PPC_PNV_H */
>> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
>> index dea9b92d4726..77ce786f000e 100644
>> --- a/include/hw/ppc/xics.h
>> +++ b/include/hw/ppc/xics.h
>> @@ -118,8 +118,27 @@ struct ICPState {
>>      uint8_t mfrr;
>>      qemu_irq output;
>>      bool cap_irq_xics_enabled;
>> +
>> +    /*
>> +     * for XICSNative (not used by Linux).
>> +     */
>> +    uint32_t links[3];
>>  };
>>  
>> +#define TYPE_XICS_NATIVE "xics-native"
>> +#define XICS_NATIVE(obj) OBJECT_CHECK(XICSNative, (obj), TYPE_XICS_NATIVE)
>> +#define XICS_NATIVE_CLASS(klass) \
>> +     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
>> +#define XICS_NATIVE_GET_CLASS(obj) \
>> +     OBJECT_CLASS_CHECK(XICSStateClass, (obj), TYPE_XICS_NATIVE)
>> +
>> +typedef struct XICSNative {
>> +    XICSState parent_obj;
>> +
>> +    GHashTable *pir_table;
>> +    MemoryRegion icp_mmio;
>> +} XICSNative;
>> +
>>  #define TYPE_ICS_BASE "ics-base"
>>  #define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE)
>>  
>> @@ -210,4 +229,9 @@ void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
>>  ICPState *xics_find_icp(XICSState *xics, int cpu_index);
>>  void xics_insert_ics(XICSState *xics, ICSState *ics);
>>  
>> +typedef struct PnvChip PnvChip;
>> +
>> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
>> +                              uint32_t base, uint32_t count);
>> +
>>  #endif /* XICS_H */
>
David Gibson Oct. 16, 2016, 11:51 p.m. UTC | #3
On Fri, Oct 14, 2016 at 11:40:30AM +0200, Cédric Le Goater wrote:
> On 10/14/2016 08:10 AM, David Gibson wrote:
> > On Mon, Oct 03, 2016 at 09:24:51AM +0200, Cédric Le Goater wrote:
> >> From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> >>
> >> This provides access to the MMIO based Interrupt Presentation
> >> Controllers (ICP) as found on a POWER8 system.
> >>
> >> A new XICSNative class is introduced to hold the MMIO region of the
> >> ICPs. It also makes use of a hash table to associate the ICPState of a
> >> CPU with a HW processor id, as this is the server number presented in
> >> the XIVEs.
> >>
> >> The class routine 'find_icp' provide the way to do the lookups when
> >> needed in the XICS base class.
> >>
> >> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> >> [clg: - naming cleanups
> >>       - replaced the use of xics_get_cpu_index_by_dt_id() by xics_find_icp()
> >>       - added some qemu logging in case of error      
> >>       - introduced a xics_native_find_icp routine to map icp index to
> >>         cpu index      
> >>       - moved sysbus mapping to chip ]
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>
> >>  checkpatch complains on this one, but it seems to be a false positive :
> >>  
> >>  ERROR: spaces required around that '&' (ctx:WxV)
> >>  #314: FILE: hw/intc/xics_native.c:246:
> >>  +                        (gpointer) &xics->ss[cs->cpu_index]);
> >>
> >>  default-configs/ppc64-softmmu.mak |   3 +-
> >>  hw/intc/Makefile.objs             |   1 +
> >>  hw/intc/xics_native.c             | 327 ++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/pnv.h              |  19 +++
> >>  include/hw/ppc/xics.h             |  24 +++
> >>  5 files changed, 373 insertions(+), 1 deletion(-)
> >>  create mode 100644 hw/intc/xics_native.c
> >>
> >> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> >> index 67a9bcaa67fa..a22c93a48686 100644
> >> --- a/default-configs/ppc64-softmmu.mak
> >> +++ b/default-configs/ppc64-softmmu.mak
> >> @@ -48,8 +48,9 @@ CONFIG_PLATFORM_BUS=y
> >>  CONFIG_ETSEC=y
> >>  CONFIG_LIBDECNUMBER=y
> >>  # For pSeries
> >> -CONFIG_XICS=$(CONFIG_PSERIES)
> >> +CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
> >>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> >> +CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
> >>  CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
> >>  # For PReP
> >>  CONFIG_MC146818RTC=y
> >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> >> index 05ec21b21e0e..7be5dfc8347b 100644
> >> --- a/hw/intc/Makefile.objs
> >> +++ b/hw/intc/Makefile.objs
> >> @@ -31,6 +31,7 @@ obj-$(CONFIG_RASPI) += bcm2835_ic.o bcm2836_control.o
> >>  obj-$(CONFIG_SH4) += sh_intc.o
> >>  obj-$(CONFIG_XICS) += xics.o
> >>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> >> +obj-$(CONFIG_XICS_NATIVE) += xics_native.o
> >>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> >>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
> >>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> >> diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
> >> new file mode 100644
> >> index 000000000000..16413d807f65
> >> --- /dev/null
> >> +++ b/hw/intc/xics_native.c
> >> @@ -0,0 +1,327 @@
> >> +/*
> >> + * QEMU PowerPC PowerNV machine model
> >> + *
> >> + * Native version of ICS/ICP
> >> + *
> >> + * Copyright (c) 2016, IBM Corporation.
> >> + *
> >> + * This library is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public
> >> + * License as published by the Free Software Foundation; either
> >> + * version 2 of the License, or (at your option) any later version.
> >> + *
> >> + * This library is distributed in the hope that it will be useful,
> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qapi/error.h"
> >> +#include "qemu-common.h"
> >> +#include "cpu.h"
> >> +#include "hw/hw.h"
> >> +#include "qemu/log.h"
> >> +#include "qapi/error.h"
> >> +
> >> +#include "hw/ppc/fdt.h"
> >> +#include "hw/ppc/xics.h"
> >> +#include "hw/ppc/pnv.h"
> >> +
> >> +#include <libfdt.h>
> >> +
> >> +static void xics_native_reset(void *opaque)
> >> +{
> >> +    device_reset(DEVICE(opaque));
> >> +}
> >> +
> >> +static void xics_native_initfn(Object *obj)
> >> +{
> >> +    XICSState *xics = XICS_COMMON(obj);
> >> +
> >> +    QLIST_INIT(&xics->ics);
> > 
> > This shouldn't be necessary, because it's done in the base class
> > initfn (which is called before the subclass initfns).
> 
> True. This is a left over. 
> 
> >> +    /*
> >> +     * Let's not forget to register a reset handler else the ICPs
> >> +     * won't be initialized with the correct values. Trouble ahead !
> >> +     */
> >> +    qemu_register_reset(xics_native_reset, xics);
> > 
> > And. this shouldn't be necessary.  If you set dc->reset to the right
> > thing in class_init (which the xics base class should already do) then
> > the device model will automatically reset the device, you shouldn't
> > need an extra reset handler.
> 
> ah ! That is what I thought but it seems that ->reset is only called when 
> the device is under a bus. When creating devices with qdev_create(),
> sysbus is used as a default but this is not the case for the xics_native 
> instance. 

Huh.. that's odd.  xics_native is a subclass of xics common which is a
subclass of sysbusdevice, which means it ought to appear on the
sysbus.

Ironically, spapr xics probably shouldn't be a sysbus device, because
it has no MMIO.  xics_native definitely should be though.

> For the story, as the reset was not called, the ICP were bogus and xics 
> was wrongly doing IPIs ... I pulled my hair on this.
> 
> >> +}
> >> +
> >> +static uint64_t xics_native_read(void *opaque, hwaddr addr, unsigned width)
> >> +{
> >> +    XICSState *s = opaque;
> >> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
> >> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> >> +    uint64_t val = 0xffffffff;
> >> +    ICPState *ss;
> >> +
> >> +    ss = xics_find_icp(s, cpu_id);
> > 
> > IIUC, cpu_id is the hardware id here... so why aren't you using the
> > hash table you added to do this mapping?  
> 
> the lookup is done in the xics_find_icp() handler later.

Ah, yes, that makes sense.

> > (although see comments elswhere, that I'm not sure that mapping belongs 
> > within xics).
> 
> OK. I am open to any design :)
> 
> > Another option to avoid the lookup would be to register each icp page
> > as a separate MR, then you could set the opaque pointer directly to ss
> > instead of to the global xics.
> 
> Yes. This is much smarter. I like that !
> 
> In xics_native_realize(), we could loop on the array xics->ss and
> create sub_memory regions for each ICP. cpu_index would be completely 
> unused, which is even better.
> 
> We would need to change the API of the icp_set_*() calls to take 
> an ICPState then, and not a XICSState as currently. The result would 
> be more consistent I think but a backlink to the XICSState, or at 
> least the ICSs, is needed for icp_resend().

> What is your feeling on this ? before I start shuffling the code.

Sounds good, I think it's worth adding the backlink for better
consistency across the pieces.

> >> +    if (!ss) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
> >> +        return val;
> >> +    }
> >> +
> >> +    switch (addr & 0xffc) {
> >> +    case 0: /* poll */
> >> +        val = icp_ipoll(ss, NULL);
> >> +        if (byte0) {
> >> +            val >>= 24;
> >> +        } else if (width != 4) {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 4: /* xirr */
> >> +        if (byte0) {
> >> +            val = icp_ipoll(ss, NULL) >> 24;
> >> +        } else if (width == 4) {
> >> +            val = icp_accept(ss);
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 12:
> >> +        if (byte0) {
> >> +            val = ss->mfrr;
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 16:
> >> +        if (width == 4) {
> >> +            val = ss->links[0];
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 20:
> >> +        if (width == 4) {
> >> +            val = ss->links[1];
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 24:
> >> +        if (width == 4) {
> >> +            val = ss->links[2];
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    default:
> >> +bad_access:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
> >> +                      HWADDR_PRIx"/%d\n", addr, width);
> >> +    }
> >> +
> >> +    return val;
> >> +}
> >> +
> >> +static void xics_native_write(void *opaque, hwaddr addr, uint64_t val,
> >> +                        unsigned width)
> >> +{
> >> +    XICSState *s = opaque;
> >> +    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
> >> +    bool byte0 = (width == 1 && (addr & 0x3) == 0);
> >> +    ICPState *ss;
> >> +
> >> +    ss = xics_find_icp(s, cpu_id);
> >> +    if (!ss) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
> >> +        return;
> >> +    }
> >> +
> >> +    switch (addr & 0xffc) {
> >> +    case 4: /* xirr */
> >> +        if (byte0) {
> >> +            icp_set_cppr(s, cpu_id, val);
> >> +        } else if (width == 4) {
> >> +            icp_eoi(s, cpu_id, val);
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 12:
> >> +        if (byte0) {
> >> +            icp_set_mfrr(s, cpu_id, val);
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 16:
> >> +        if (width == 4) {
> >> +            ss->links[0] = val;
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 20:
> >> +        if (width == 4) {
> >> +            ss->links[1] = val;
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    case 24:
> >> +        if (width == 4) {
> >> +            ss->links[2] = val;
> >> +        } else {
> >> +            goto bad_access;
> >> +        }
> >> +        break;
> >> +    default:
> >> +bad_access:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
> >> +                      HWADDR_PRIx"/%d\n", addr, width);
> >> +    }
> >> +}
> >> +
> >> +static const MemoryRegionOps xics_native_ops = {
> >> +    .read = xics_native_read,
> >> +    .write = xics_native_write,
> >> +    .valid.min_access_size = 1,
> >> +    .valid.max_access_size = 4,
> >> +    .impl.min_access_size = 1,
> >> +    .impl.max_access_size = 4,
> >> +    .endianness = DEVICE_BIG_ENDIAN,
> >> +};
> >> +
> >> +static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
> >> +                                Error **errp)
> >> +{
> >> +    int i;
> >> +
> >> +    icp->nr_servers = nr_servers;
> >> +
> >> +    icp->ss = g_malloc0(icp->nr_servers * sizeof(ICPState));
> >> +    for (i = 0; i < icp->nr_servers; i++) {
> >> +        char name[32];
> >> +        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_ICP);
> >> +        snprintf(name, sizeof(name), "icp[%d]", i);
> >> +        object_property_add_child(OBJECT(icp), name, OBJECT(&icp->ss[i]),
> >> +                                  errp);
> > 
> > AFAICT this is identical to xics-spapr version and only difference to
> > xics-kvm is it uses TYPE_KVM_ICP.  We should look at fusing these -
> > maybe just having an icp typename in the xics class.
> 
> ok. I can add a preliminary patch for a common routine. 
> 
> >> +    }
> >> +}
> >> +
> >> +static void xics_native_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    XICSState *xics = XICS_COMMON(dev);
> >> +    XICSNative *xicsn = XICS_NATIVE(dev);
> >> +    Error *error = NULL;
> >> +    int i;
> >> +
> >> +    if (!xics->nr_servers) {
> >> +        error_setg(errp, "Number of servers needs to be greater than 0");
> >> +        return;
> >> +    }
> >> +
> >> +    for (i = 0; i < xics->nr_servers; i++) {
> >> +        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
> >> +                                 &error);
> >> +        if (error) {
> >> +            error_propagate(errp, error);
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >> +    xicsn->pir_table = g_hash_table_new(g_direct_hash, g_direct_equal);
> > 
> > I'm not sure having this map inside xics makes sense.  Wouldn't it be
> > more widely useful to have a fast PIR -> cpu_index map at the machine
> > level? 
> 
> well, I would rather find a way of not using the cpu_index at all. We 
> have HW cpu ids and ICPState to link together, if we could avoid the 
> extra indexing layer, that would be better. If not, I would rather hide 
> it deeply for people not to start using something which is a work around
> IMO.
> 
> > That could also use the structure of the hwids to do a fast
> > lookup neatly (collapse core id with a small table, recombine with
> > chip and thread id).  A hash table seems a big hammer to throw at it.
> 
> yes. It was easy to use to start with but hopefully we will get rid of
> it with your idea on the memory regions.

Ok, let's see how it turns out.

> 
> Thanks,
> 
> C. 
> 
> 
> >> +    /* Register MMIO regions */
> >> +    memory_region_init_io(&xicsn->icp_mmio, OBJECT(dev), &xics_native_ops,
> >> +                          xicsn, "xics", PNV_XICS_SIZE);
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xicsn->icp_mmio);
> >> +}
> >> +
> >> +static void xics_native_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
> >> +{
> >> +    CPUState *cs = CPU(cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +    XICSNative *xicsn = XICS_NATIVE(xics);
> >> +
> >> +    assert(cs->cpu_index < xics->nr_servers);
> >> +    g_hash_table_insert(xicsn->pir_table, GINT_TO_POINTER(env->spr[SPR_PIR]),
> >> +                        (gpointer) &xics->ss[cs->cpu_index]);
> >> +}
> >> +
> >> +static ICPState *xics_native_find_icp(XICSState *xics, int pir)
> >> +{
> >> +    XICSNative *xicsn = XICS_NATIVE(xics);
> >> +    gpointer key, value;
> >> +    gboolean found = g_hash_table_lookup_extended(xicsn->pir_table,
> >> +                                GINT_TO_POINTER(pir), &key, &value);
> >> +
> >> +    assert(found);
> >> +
> >> +    return (ICPState *) value;
> >> +}
> >> +
> >> +static void xics_native_class_init(ObjectClass *oc, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(oc);
> >> +    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
> >> +
> >> +    dc->realize = xics_native_realize;
> >> +    xsc->set_nr_servers = xics_set_nr_servers;
> >> +    xsc->cpu_setup = xics_native_cpu_setup;
> >> +    xsc->find_icp = xics_native_find_icp;
> >> +}
> >> +
> >> +static const TypeInfo xics_native_info = {
> >> +    .name          = TYPE_XICS_NATIVE,
> >> +    .parent        = TYPE_XICS_COMMON,
> >> +    .instance_size = sizeof(XICSNative),
> >> +    .class_size = sizeof(XICSStateClass),
> >> +    .class_init    = xics_native_class_init,
> >> +    .instance_init = xics_native_initfn,
> >> +};
> >> +
> >> +static void xics_native_register_types(void)
> >> +{
> >> +    type_register_static(&xics_native_info);
> >> +}
> >> +
> >> +type_init(xics_native_register_types)
> >> +
> >> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
> >> +                              uint32_t pir, uint32_t count)
> >> +{
> >> +    uint64_t addr;
> >> +    char *name;
> >> +    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
> >> +    uint32_t irange[2], i, rsize;
> >> +    uint64_t *reg;
> >> +
> >> +    /*
> >> +     * TODO: add multichip ICP BAR
> >> +     */
> >> +    addr = PNV_XICS_BASE | (pir << 12);
> >> +
> >> +    irange[0] = cpu_to_be32(pir);
> >> +    irange[1] = cpu_to_be32(count);
> >> +
> >> +    rsize = sizeof(uint64_t) * 2 * count;
> >> +    reg = g_malloc(rsize);
> >> +    for (i = 0; i < count; i++) {
> >> +        reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
> >> +        reg[i * 2 + 1] = cpu_to_be64(0x1000);
> >> +    }
> >> +
> >> +    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
> >> +    offset = fdt_add_subnode(fdt, offset, name);
> >> +    _FDT(offset);
> >> +    g_free(name);
> >> +
> >> +    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
> >> +    _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
> >> +    _FDT((fdt_setprop_string(fdt, offset, "device_type",
> >> +                              "PowerPC-External-Interrupt-Presentation")));
> >> +    _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
> >> +    _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
> >> +                       irange, sizeof(irange))));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
> >> +    g_free(reg);
> >> +}
> >> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> >> index 617c3fdd4f06..3f24b87d199b 100644
> >> --- a/include/hw/ppc/pnv.h
> >> +++ b/include/hw/ppc/pnv.h
> >> @@ -125,4 +125,23 @@ typedef struct PnvMachineState {
> >>  #define PNV_XSCOM_BASE(chip)                                            \
> >>      (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
> >>  
> >> +/*
> >> + * XSCOM 0x20109CA defines the ICP BAR:
> >> + *
> >> + * 0:29   : bits 14 to 43 of address to define 1 MB region.
> >> + * 30     : 1 to enable ICP to receive loads/stores against its BAR region
> >> + * 31:63  : Constant 0
> >> + *
> >> + * Usually defined as :
> >> + *
> >> + *      0xffffe00200000000 -> 0x0003ffff80000000
> >> + *      0xffffe00600000000 -> 0x0003ffff80100000
> >> + *      0xffffe02200000000 -> 0x0003ffff80800000
> >> + *      0xffffe02600000000 -> 0x0003ffff80900000
> >> + *
> >> + * TODO: make a macro using the chip hw id ?
> >> + */
> >> +#define PNV_XICS_BASE         0x0003ffff80000000ull
> >> +#define PNV_XICS_SIZE         0x0000000000100000ull
> >> +
> >>  #endif /* _PPC_PNV_H */
> >> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> >> index dea9b92d4726..77ce786f000e 100644
> >> --- a/include/hw/ppc/xics.h
> >> +++ b/include/hw/ppc/xics.h
> >> @@ -118,8 +118,27 @@ struct ICPState {
> >>      uint8_t mfrr;
> >>      qemu_irq output;
> >>      bool cap_irq_xics_enabled;
> >> +
> >> +    /*
> >> +     * for XICSNative (not used by Linux).
> >> +     */
> >> +    uint32_t links[3];
> >>  };
> >>  
> >> +#define TYPE_XICS_NATIVE "xics-native"
> >> +#define XICS_NATIVE(obj) OBJECT_CHECK(XICSNative, (obj), TYPE_XICS_NATIVE)
> >> +#define XICS_NATIVE_CLASS(klass) \
> >> +     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
> >> +#define XICS_NATIVE_GET_CLASS(obj) \
> >> +     OBJECT_CLASS_CHECK(XICSStateClass, (obj), TYPE_XICS_NATIVE)
> >> +
> >> +typedef struct XICSNative {
> >> +    XICSState parent_obj;
> >> +
> >> +    GHashTable *pir_table;
> >> +    MemoryRegion icp_mmio;
> >> +} XICSNative;
> >> +
> >>  #define TYPE_ICS_BASE "ics-base"
> >>  #define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE)
> >>  
> >> @@ -210,4 +229,9 @@ void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
> >>  ICPState *xics_find_icp(XICSState *xics, int cpu_index);
> >>  void xics_insert_ics(XICSState *xics, ICSState *ics);
> >>  
> >> +typedef struct PnvChip PnvChip;
> >> +
> >> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
> >> +                              uint32_t base, uint32_t count);
> >> +
> >>  #endif /* XICS_H */
> > 
>
diff mbox

Patch

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 67a9bcaa67fa..a22c93a48686 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -48,8 +48,9 @@  CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
 # For pSeries
-CONFIG_XICS=$(CONFIG_PSERIES)
+CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV))
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
+CONFIG_XICS_NATIVE=$(CONFIG_POWERNV)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 05ec21b21e0e..7be5dfc8347b 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -31,6 +31,7 @@  obj-$(CONFIG_RASPI) += bcm2835_ic.o bcm2836_control.o
 obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
+obj-$(CONFIG_XICS_NATIVE) += xics_native.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c
new file mode 100644
index 000000000000..16413d807f65
--- /dev/null
+++ b/hw/intc/xics_native.c
@@ -0,0 +1,327 @@ 
+/*
+ * QEMU PowerPC PowerNV machine model
+ *
+ * Native version of ICS/ICP
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "hw/hw.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/xics.h"
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+static void xics_native_reset(void *opaque)
+{
+    device_reset(DEVICE(opaque));
+}
+
+static void xics_native_initfn(Object *obj)
+{
+    XICSState *xics = XICS_COMMON(obj);
+
+    QLIST_INIT(&xics->ics);
+
+    /*
+     * Let's not forget to register a reset handler else the ICPs
+     * won't be initialized with the correct values. Trouble ahead !
+     */
+    qemu_register_reset(xics_native_reset, xics);
+}
+
+static uint64_t xics_native_read(void *opaque, hwaddr addr, unsigned width)
+{
+    XICSState *s = opaque;
+    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
+    bool byte0 = (width == 1 && (addr & 0x3) == 0);
+    uint64_t val = 0xffffffff;
+    ICPState *ss;
+
+    ss = xics_find_icp(s, cpu_id);
+    if (!ss) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
+        return val;
+    }
+
+    switch (addr & 0xffc) {
+    case 0: /* poll */
+        val = icp_ipoll(ss, NULL);
+        if (byte0) {
+            val >>= 24;
+        } else if (width != 4) {
+            goto bad_access;
+        }
+        break;
+    case 4: /* xirr */
+        if (byte0) {
+            val = icp_ipoll(ss, NULL) >> 24;
+        } else if (width == 4) {
+            val = icp_accept(ss);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 12:
+        if (byte0) {
+            val = ss->mfrr;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 16:
+        if (width == 4) {
+            val = ss->links[0];
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 20:
+        if (width == 4) {
+            val = ss->links[1];
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 24:
+        if (width == 4) {
+            val = ss->links[2];
+        } else {
+            goto bad_access;
+        }
+        break;
+    default:
+bad_access:
+        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
+                      HWADDR_PRIx"/%d\n", addr, width);
+    }
+
+    return val;
+}
+
+static void xics_native_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    XICSState *s = opaque;
+    uint32_t cpu_id = (addr & (PNV_XICS_SIZE - 1)) >> 12;
+    bool byte0 = (width == 1 && (addr & 0x3) == 0);
+    ICPState *ss;
+
+    ss = xics_find_icp(s, cpu_id);
+    if (!ss) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP server %d\n", cpu_id);
+        return;
+    }
+
+    switch (addr & 0xffc) {
+    case 4: /* xirr */
+        if (byte0) {
+            icp_set_cppr(s, cpu_id, val);
+        } else if (width == 4) {
+            icp_eoi(s, cpu_id, val);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 12:
+        if (byte0) {
+            icp_set_mfrr(s, cpu_id, val);
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 16:
+        if (width == 4) {
+            ss->links[0] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 20:
+        if (width == 4) {
+            ss->links[1] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    case 24:
+        if (width == 4) {
+            ss->links[2] = val;
+        } else {
+            goto bad_access;
+        }
+        break;
+    default:
+bad_access:
+        qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%"
+                      HWADDR_PRIx"/%d\n", addr, width);
+    }
+}
+
+static const MemoryRegionOps xics_native_ops = {
+    .read = xics_native_read,
+    .write = xics_native_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void xics_set_nr_servers(XICSState *icp, uint32_t nr_servers,
+                                Error **errp)
+{
+    int i;
+
+    icp->nr_servers = nr_servers;
+
+    icp->ss = g_malloc0(icp->nr_servers * sizeof(ICPState));
+    for (i = 0; i < icp->nr_servers; i++) {
+        char name[32];
+        object_initialize(&icp->ss[i], sizeof(icp->ss[i]), TYPE_ICP);
+        snprintf(name, sizeof(name), "icp[%d]", i);
+        object_property_add_child(OBJECT(icp), name, OBJECT(&icp->ss[i]),
+                                  errp);
+    }
+}
+
+static void xics_native_realize(DeviceState *dev, Error **errp)
+{
+    XICSState *xics = XICS_COMMON(dev);
+    XICSNative *xicsn = XICS_NATIVE(dev);
+    Error *error = NULL;
+    int i;
+
+    if (!xics->nr_servers) {
+        error_setg(errp, "Number of servers needs to be greater than 0");
+        return;
+    }
+
+    for (i = 0; i < xics->nr_servers; i++) {
+        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
+                                 &error);
+        if (error) {
+            error_propagate(errp, error);
+            return;
+        }
+    }
+
+    xicsn->pir_table = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+    /* Register MMIO regions */
+    memory_region_init_io(&xicsn->icp_mmio, OBJECT(dev), &xics_native_ops,
+                          xicsn, "xics", PNV_XICS_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xicsn->icp_mmio);
+}
+
+static void xics_native_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    XICSNative *xicsn = XICS_NATIVE(xics);
+
+    assert(cs->cpu_index < xics->nr_servers);
+    g_hash_table_insert(xicsn->pir_table, GINT_TO_POINTER(env->spr[SPR_PIR]),
+                        (gpointer) &xics->ss[cs->cpu_index]);
+}
+
+static ICPState *xics_native_find_icp(XICSState *xics, int pir)
+{
+    XICSNative *xicsn = XICS_NATIVE(xics);
+    gpointer key, value;
+    gboolean found = g_hash_table_lookup_extended(xicsn->pir_table,
+                                GINT_TO_POINTER(pir), &key, &value);
+
+    assert(found);
+
+    return (ICPState *) value;
+}
+
+static void xics_native_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    XICSStateClass *xsc = XICS_NATIVE_CLASS(oc);
+
+    dc->realize = xics_native_realize;
+    xsc->set_nr_servers = xics_set_nr_servers;
+    xsc->cpu_setup = xics_native_cpu_setup;
+    xsc->find_icp = xics_native_find_icp;
+}
+
+static const TypeInfo xics_native_info = {
+    .name          = TYPE_XICS_NATIVE,
+    .parent        = TYPE_XICS_COMMON,
+    .instance_size = sizeof(XICSNative),
+    .class_size = sizeof(XICSStateClass),
+    .class_init    = xics_native_class_init,
+    .instance_init = xics_native_initfn,
+};
+
+static void xics_native_register_types(void)
+{
+    type_register_static(&xics_native_info);
+}
+
+type_init(xics_native_register_types)
+
+void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
+                              uint32_t pir, uint32_t count)
+{
+    uint64_t addr;
+    char *name;
+    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
+    uint32_t irange[2], i, rsize;
+    uint64_t *reg;
+
+    /*
+     * TODO: add multichip ICP BAR
+     */
+    addr = PNV_XICS_BASE | (pir << 12);
+
+    irange[0] = cpu_to_be32(pir);
+    irange[1] = cpu_to_be32(count);
+
+    rsize = sizeof(uint64_t) * 2 * count;
+    reg = g_malloc(rsize);
+    for (i = 0; i < count; i++) {
+        reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
+        reg[i * 2 + 1] = cpu_to_be64(0x1000);
+    }
+
+    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
+    offset = fdt_add_subnode(fdt, offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type",
+                              "PowerPC-External-Interrupt-Presentation")));
+    _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
+                       irange, sizeof(irange))));
+    _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
+    g_free(reg);
+}
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 617c3fdd4f06..3f24b87d199b 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -125,4 +125,23 @@  typedef struct PnvMachineState {
 #define PNV_XSCOM_BASE(chip)                                            \
     (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
 
+/*
+ * XSCOM 0x20109CA defines the ICP BAR:
+ *
+ * 0:29   : bits 14 to 43 of address to define 1 MB region.
+ * 30     : 1 to enable ICP to receive loads/stores against its BAR region
+ * 31:63  : Constant 0
+ *
+ * Usually defined as :
+ *
+ *      0xffffe00200000000 -> 0x0003ffff80000000
+ *      0xffffe00600000000 -> 0x0003ffff80100000
+ *      0xffffe02200000000 -> 0x0003ffff80800000
+ *      0xffffe02600000000 -> 0x0003ffff80900000
+ *
+ * TODO: make a macro using the chip hw id ?
+ */
+#define PNV_XICS_BASE         0x0003ffff80000000ull
+#define PNV_XICS_SIZE         0x0000000000100000ull
+
 #endif /* _PPC_PNV_H */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index dea9b92d4726..77ce786f000e 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -118,8 +118,27 @@  struct ICPState {
     uint8_t mfrr;
     qemu_irq output;
     bool cap_irq_xics_enabled;
+
+    /*
+     * for XICSNative (not used by Linux).
+     */
+    uint32_t links[3];
 };
 
+#define TYPE_XICS_NATIVE "xics-native"
+#define XICS_NATIVE(obj) OBJECT_CHECK(XICSNative, (obj), TYPE_XICS_NATIVE)
+#define XICS_NATIVE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE)
+#define XICS_NATIVE_GET_CLASS(obj) \
+     OBJECT_CLASS_CHECK(XICSStateClass, (obj), TYPE_XICS_NATIVE)
+
+typedef struct XICSNative {
+    XICSState parent_obj;
+
+    GHashTable *pir_table;
+    MemoryRegion icp_mmio;
+} XICSNative;
+
 #define TYPE_ICS_BASE "ics-base"
 #define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE)
 
@@ -210,4 +229,9 @@  void xics_hmp_info_pic(Monitor *mon, const QDict *qdict);
 ICPState *xics_find_icp(XICSState *xics, int cpu_index);
 void xics_insert_ics(XICSState *xics, ICSState *ics);
 
+typedef struct PnvChip PnvChip;
+
+void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset,
+                              uint32_t base, uint32_t count);
+
 #endif /* XICS_H */