Patchwork [RFC,v2,6/6] hw: arm_gic_kvm: Add KVM VGIC save/restore logic

login
register
mail settings
Submitter Christoffer Dall
Date Sept. 26, 2013, 9:03 p.m.
Message ID <1380229386-24166-7-git-send-email-christoffer.dall@linaro.org>
Download mbox | patch
Permalink /patch/278280/
State New
Headers show

Comments

Christoffer Dall - Sept. 26, 2013, 9:03 p.m.
Save and restore the ARM KVM VGIC state from the kernel.  We rely on
QEMU to marshal the GICState data structure and therefore simply
synchronize the kernel state with the QEMU emulated state in both
directions.

We take some care on the restore path to check the VGIC has been
configured with enough IRQs and CPU interfaces that we can properly
restore the state, and for separate set/clear registers we first fully
clear the registers and then set the required bits.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

Changelog [v2]:
 - Remove num_irq from GIC VMstate structure
 - Increment GIC VMstate version number
 - Use extract32/deposit32 for bit-field modifications
 - Address other smaller review comments
 - Renames kvm_arm_gic_dist_[readr/writer] functions to
   kvm_dist_[get/put] and shortened other function names
 - Use concrete format for APRn
---
 hw/intc/arm_gic_common.c |    5 +-
 hw/intc/arm_gic_kvm.c    |  424 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/intc/gic_internal.h   |    8 +
 3 files changed, 433 insertions(+), 4 deletions(-)
alex.bennee@linaro.org - Sept. 27, 2013, 8:11 a.m.
christoffer.dall@linaro.org writes:

> Save and restore the ARM KVM VGIC state from the kernel.  We rely on
<snip>
>  
>  static const VMStateDescription vmstate_gic = {
>      .name = "arm_gic",
> -    .version_id = 6,
> -    .minimum_version_id = 6,
> +    .version_id = 7,
> +    .minimum_version_id = 7,
>      .pre_save = gic_pre_save,
>      .post_load = gic_post_load,
>      .fields = (VMStateField[]) {

Does this mean QEMU and Kernel need to be kept in lock-step for
compatibility?

>  
> +//#define DEBUG_GIC_KVM
> +
> +#ifdef DEBUG_GIC_KVM
> +static const int debug_gic_kvm = 1;
> +#else
> +static const int debug_gic_kvm = 0;
> +#endif
> +
> +#define DPRINTF(fmt, ...) do { \
> +        if (debug_gic_kvm) { \
> +            printf("arm_gic: " fmt , ## __VA_ARGS__); \
> +        } \
> +    } while (0)
> +

Shouldn't we be using QEMU logging framework for this? Also for the
fprintfs later on.

>  #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
>  #define KVM_ARM_GIC(obj) \
>       OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
> @@ -72,14 +87,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
>      kvm_set_irq(kvm_state, kvm_irq, !!level);
>  }
>  
> +static bool kvm_arm_gic_can_save_restore(GICState *s)
> +{
> +    return s->dev_fd >= 0;
> +}
> +
> +static void kvm_gic_access(GICState *s, int group, int offset,
> +                                   int cpu, uint32_t *val, bool write)
> +{
> +    struct kvm_device_attr attr;
> +    int type;
> +    int err;
> +
> +    cpu = cpu & 0xff;
> +
> +    attr.flags = 0;
> +    attr.group = group;
> +    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
> +                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
> +    attr.addr = (uintptr_t)val;
> +
> +    if (write) {
> +        type = KVM_SET_DEVICE_ATTR;
> +    } else {
> +        type = KVM_GET_DEVICE_ATTR;
> +    }
> +
> +    err = kvm_device_ioctl(s->dev_fd, type, &attr);
> +    if (err < 0) {
> +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> +                    strerror(-err));
> +            abort();
> +    }
> +}
> +
> +static void kvm_gicd_access(GICState *s, int offset, int cpu,
> +                            uint32_t *val, bool write)
> +{
> +    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
> +                   offset, cpu, val, write);
> +}
> +
> +static void kvm_gicc_access(GICState *s, int offset, int cpu,
> +                            uint32_t *val, bool write)
> +{
> +    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
> +                   offset, cpu, val, write);
> +}
> +
> +#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
> +    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
> +
> +/*
> + * Translate from the in-kernel field for an IRQ value to/from the qemu
> + * representation.
> + */
> +typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
> +                                  uint32_t *field, bool to_kernel);
> +
> +/* synthetic translate function used for clear/set registers to completely
> + * clear a setting using a clear-register before setting the remaing bits
> + * using a set-register */
> +static void translate_clear(GICState *s, int irq, int cpu,
> +                            uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = ~0;
> +    } else {
> +        /* does not make sense: qemu model doesn't use set/clear regs */
> +        abort();
> +    }
> +}
> +
> +static void translate_enabled(GICState *s, int irq, int cpu,
> +                              uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_ENABLED(irq, cm);
> +    } else {
> +        if (*field & 1) {
> +            GIC_SET_ENABLED(irq, cm);
> +        }
> +    }
> +}
> +
> +static void translate_pending(GICState *s, int irq, int cpu,
> +                              uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_PENDING(irq, cm);
> +    } else {
> +        if (*field & 1) {
> +            GIC_SET_PENDING(irq, cm);
> +            /* TODO: Capture is level-line is held high in the kernel */
> +        }
> +    }
> +}
> +
> +static void translate_active(GICState *s, int irq, int cpu,
> +                             uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_ACTIVE(irq, cm);
> +    } else {
> +        if (*field & 1) {

Should 1, 0x2 etc be #define'd constants?

<snip>
>  static void kvm_arm_gic_put(GICState *s)
>  {
> -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> +    uint32_t reg;
> +    int i;
> +    int cpu;
> +    int num_cpu;
> +    int num_irq;
> +
> +    if (!kvm_arm_gic_can_save_restore(s)) {
> +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> +            return;
> +    }
> +
> +    /* Note: We do the restore in a slightly different order than the save
> +     * (where the order doesn't matter and is simply ordered according to the
> +     * register offset values */
> +
> +    /*****************************************************************
> +     * Distributor State
> +     */
> +
> +    /* s->enabled -> GICD_CTLR */
> +    reg = s->enabled;
> +    kvm_gicd_access(s, 0x0, 0, &reg, true);
> +
> +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> +    kvm_gicd_access(s, 0x4, 0, &reg, false);
> +    num_irq = ((reg & 0x1f) + 1) * 32;
> +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> +
> +    if (num_irq < s->num_irq) {
> +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> +                    s->num_irq, num_irq);
> +            abort();
> +    } else if (num_cpu != s->num_cpu ) {
> +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> +                    s->num_cpu, num_cpu);
> +            /* Did we not create the VCPUs in the kernel yet? */
> +            abort();
> +    }
> +
> +    /* TODO: Consider checking compatibility with the IIDR ? */
> +
> +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> +    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
> +
> +    /* s->irq_target[irq] -> GICD_ITARGETSRn
> +     * (restore targets before pending to ensure the pending state is set on
> +     * the appropriate CPU interfaces in the kernel) */
> +    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
> +
> +    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
> +    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
> +
> +    /* irq_state[n].active -> GICD_ISACTIVERn */
> +    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
> +
> +    /* irq_state[n].trigger -> GICD_ICFRn */
> +    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
> +
> +    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
> +    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
> +
> +    /* s->sgi_source -> ICD_CPENDSGIRn */
> +    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
> +    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);

Again what are these magic numbers? I assume the comments refer to the
actual reg names in the manual? Perhaps putting a reference to the
manual if these values aren't to be used else where?

<snip>
Peter Maydell - Oct. 15, 2013, 10:35 a.m.
On 27 September 2013 09:11, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> christoffer.dall@linaro.org writes:
>
>> Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> <snip>
>>
>>  static const VMStateDescription vmstate_gic = {
>>      .name = "arm_gic",
>> -    .version_id = 6,
>> -    .minimum_version_id = 6,
>> +    .version_id = 7,
>> +    .minimum_version_id = 7,
>>      .pre_save = gic_pre_save,
>>      .post_load = gic_post_load,
>>      .fields = (VMStateField[]) {
>
> Does this mean QEMU and Kernel need to be kept in lock-step for
> compatibility?

No. This patch is a little confusing because it's both adding
the new fields and also adding the save/restore support, but
once we have the data structures and vmstate in QEMU holding
all the state the kernel needs, there shouldn't be any need
to update the vmstate in a backwards-incompatible way.

>>
>> +//#define DEBUG_GIC_KVM
>> +
>> +#ifdef DEBUG_GIC_KVM
>> +static const int debug_gic_kvm = 1;
>> +#else
>> +static const int debug_gic_kvm = 0;
>> +#endif
>> +
>> +#define DPRINTF(fmt, ...) do { \
>> +        if (debug_gic_kvm) { \
>> +            printf("arm_gic: " fmt , ## __VA_ARGS__); \
>> +        } \
>> +    } while (0)
>> +
>
> Shouldn't we be using QEMU logging framework for this? Also for the
> fprintfs later on.

No, these are debug printfs, not things which would be interesting
to the average user/person trying to debug a guest. We don't
have a particularly clean framework for compile time
enabled debug printfs, so 'some random macro in each individual
file' is the current approach.

-- PMM
Peter Maydell - Oct. 15, 2013, 11:15 a.m.
On 26 September 2013 22:03, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> QEMU to marshal the GICState data structure and therefore simply
> synchronize the kernel state with the QEMU emulated state in both
> directions.
>
> We take some care on the restore path to check the VGIC has been
> configured with enough IRQs and CPU interfaces that we can properly
> restore the state, and for separate set/clear registers we first fully
> clear the registers and then set the required bits.
>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
>
> Changelog [v2]:
>  - Remove num_irq from GIC VMstate structure
>  - Increment GIC VMstate version number
>  - Use extract32/deposit32 for bit-field modifications
>  - Address other smaller review comments
>  - Renames kvm_arm_gic_dist_[readr/writer] functions to
>    kvm_dist_[get/put] and shortened other function names
>  - Use concrete format for APRn
> ---
>  hw/intc/arm_gic_common.c |    5 +-
>  hw/intc/arm_gic_kvm.c    |  424 +++++++++++++++++++++++++++++++++++++++++++++-
>  hw/intc/gic_internal.h   |    8 +
>  3 files changed, 433 insertions(+), 4 deletions(-)
>
> diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> index 5449d77..1d3b738 100644
> --- a/hw/intc/arm_gic_common.c
> +++ b/hw/intc/arm_gic_common.c
> @@ -58,8 +58,8 @@ static const VMStateDescription vmstate_gic_irq_state = {
>
>  static const VMStateDescription vmstate_gic = {
>      .name = "arm_gic",
> -    .version_id = 6,
> -    .minimum_version_id = 6,
> +    .version_id = 7,
> +    .minimum_version_id = 7,
>      .pre_save = gic_pre_save,
>      .post_load = gic_post_load,
>      .fields = (VMStateField[]) {
> @@ -78,6 +78,7 @@ static const VMStateDescription vmstate_gic = {
>          VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
>          VMSTATE_UINT8_ARRAY(bpr, GICState, NCPU),
>          VMSTATE_UINT8_ARRAY(abpr, GICState, NCPU),
> +        VMSTATE_UINT32_2DARRAY(apr, GICState, 4, NCPU),

I feel like we should add this new apr state (plus some
documentation and at least the TCG read/write interface
to the state) in one patch and then put the save/load
in its own patch.

> +    err = kvm_device_ioctl(s->dev_fd, type, &attr);
> +    if (err < 0) {
> +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> +                    strerror(-err));
> +            abort();

Bad indent.

> +    }
> +}
>  static void kvm_arm_gic_put(GICState *s)
>  {
> -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> +    uint32_t reg;
> +    int i;
> +    int cpu;
> +    int num_cpu;
> +    int num_irq;
> +
> +    if (!kvm_arm_gic_can_save_restore(s)) {
> +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> +            return;
> +    }
> +
> +    /* Note: We do the restore in a slightly different order than the save
> +     * (where the order doesn't matter and is simply ordered according to the
> +     * register offset values */
> +
> +    /*****************************************************************
> +     * Distributor State
> +     */
> +
> +    /* s->enabled -> GICD_CTLR */
> +    reg = s->enabled;
> +    kvm_gicd_access(s, 0x0, 0, &reg, true);
> +
> +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> +    kvm_gicd_access(s, 0x4, 0, &reg, false);
> +    num_irq = ((reg & 0x1f) + 1) * 32;
> +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> +
> +    if (num_irq < s->num_irq) {
> +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> +                    s->num_irq, num_irq);
> +            abort();
> +    } else if (num_cpu != s->num_cpu ) {
> +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> +                    s->num_cpu, num_cpu);
> +            /* Did we not create the VCPUs in the kernel yet? */
> +            abort();
> +    }
> +
> +    /* TODO: Consider checking compatibility with the IIDR ? */
> +
> +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> +    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
> +
> +    /* s->irq_target[irq] -> GICD_ITARGETSRn
> +     * (restore targets before pending to ensure the pending state is set on
> +     * the appropriate CPU interfaces in the kernel) */
> +    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
> +
> +    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
> +    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
> +
> +    /* irq_state[n].active -> GICD_ISACTIVERn */
> +    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
> +    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
> +
> +    /* irq_state[n].trigger -> GICD_ICFRn */
> +    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
> +
> +    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
> +    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
> +
> +    /* s->sgi_source -> ICD_CPENDSGIRn */
> +    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
> +    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
> +
> +
> +    /*****************************************************************
> +     * CPU Interface(s) State
> +     */
> +
> +    for (cpu = 0; cpu < s->num_cpu; cpu++) {
> +        /* s->cpu_enabled[cpu] -> GICC_CTLR */
> +        reg = s->cpu_enabled[cpu];
> +        kvm_gicc_access(s, 0x00, cpu, &reg, true);
> +
> +        /* s->priority_mask[cpu] -> GICC_PMR */
> +        reg = (s->priority_mask[cpu] & 0xff);
> +        kvm_gicc_access(s, 0x04, cpu, &reg, true);
> +
> +        /* s->bpr[cpu] -> GICC_BPR */
> +        reg = (s->bpr[cpu] & 0x7);
> +        kvm_gicc_access(s, 0x08, cpu, &reg, true);
> +
> +        /* s->abpr[cpu] -> GICC_ABPR */
> +        reg = (s->abpr[cpu] & 0x7);
> +        kvm_gicc_access(s, 0x1c, cpu, &reg, true);
> +
> +        /* s->apr[n][cpu] -> GICC_APRn */
> +        for (i = 0; i < 4; i++) {
> +            reg = s->apr[i][cpu];
> +            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, true);
> +        }
> +    }
>  }

So an interesting question here is "is there state we currently
save/restore for the TCG GIC which we're not passing to the
kernel, and if so why?". I think the fields we don't do anything
with are:
 gic_irq_state::model
 GICState::last_active
 GICState::running_irq
 GICState::running_priority
 GICState::current_pending

I'm guessing these are a mix of "kernel doesn't implement
something" and "TCG model is wrong and this state should
show up somewhere else"; but which is which?
> diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
> index 758b85a..f9133b9 100644
> --- a/hw/intc/gic_internal.h
> +++ b/hw/intc/gic_internal.h
> @@ -99,6 +99,14 @@ typedef struct GICState {
>      uint8_t  bpr[NCPU];
>      uint8_t  abpr[NCPU];
>
> +    /* The APR is implementation defined, so we choose a layout identical to
> +     * the KVM ABI layout for QEMU's implementation of the gic:
> +     * If one or more interrupts for preemption level X are active, then
> +     *   APRn[X mod 32] == 0b1,  where n = X / 32
> +     * otherwise the bit is clear.
> +     */
> +    uint32_t apr[4][NCPU];

You can delete the "or more" -- there can onyl be one
interrupt active at each group priority (GICv2 spec 3.3.3).

Also, where does the hardcoded 4 come from? Answer: it's
the worst-case number of GICC_APRn registers, which happens
if you have 7 bits of group priority, which means you have
128 preemption levels, which at one bit per level fits into
4 32 bit APR regs. I think a GIC_NR_APRS #define might be nice.

> +
>      uint32_t num_cpu;
>
>      MemoryRegion iomem; /* Distributor */
> --
> 1.7.10.4

thanks
-- PMM
Christoffer Dall - Nov. 19, 2013, 3:50 a.m.
On Fri, Sep 27, 2013 at 09:11:18AM +0100, Alex Bennée wrote:
> 
> christoffer.dall@linaro.org writes:
> 

[...]

> > +
> > +static void translate_active(GICState *s, int irq, int cpu,
> > +                             uint32_t *field, bool to_kernel)
> > +{
> > +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> > +
> > +    if (to_kernel) {
> > +        *field = GIC_TEST_ACTIVE(irq, cm);
> > +    } else {
> > +        if (*field & 1) {
> 
> Should 1, 0x2 etc be #define'd constants?
> 
> <snip>

It would be a little tedious.  The whole bunch of emulation files for
the GIC here are sort of predicated on the fact that you know how to
find the GIC manual and can see the register encodings and determine the
offsets.

There's also very little reuse for these, as this is the only place
where we translate the qemu-specific data structure format to the KVM
API format.

That being said, I wouldn't mind cleaning up a lot of this code and
adding defines for the register offsets would at least be very helpful,
but for now I'm sticking with the current style, trying to get this code
finished, and then I'll follow up with a cleanup patch set, which takes
care of the whole lot - not just the KVM interface specific bits.

> >  static void kvm_arm_gic_put(GICState *s)
> >  {
> > -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> > +    uint32_t reg;
> > +    int i;
> > +    int cpu;
> > +    int num_cpu;
> > +    int num_irq;
> > +
> > +    if (!kvm_arm_gic_can_save_restore(s)) {
> > +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> > +            return;
> > +    }
> > +
> > +    /* Note: We do the restore in a slightly different order than the save
> > +     * (where the order doesn't matter and is simply ordered according to the
> > +     * register offset values */
> > +
> > +    /*****************************************************************
> > +     * Distributor State
> > +     */
> > +
> > +    /* s->enabled -> GICD_CTLR */
> > +    reg = s->enabled;
> > +    kvm_gicd_access(s, 0x0, 0, &reg, true);
> > +
> > +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> > +    kvm_gicd_access(s, 0x4, 0, &reg, false);
> > +    num_irq = ((reg & 0x1f) + 1) * 32;
> > +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> > +
> > +    if (num_irq < s->num_irq) {
> > +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> > +                    s->num_irq, num_irq);
> > +            abort();
> > +    } else if (num_cpu != s->num_cpu ) {
> > +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> > +                    s->num_cpu, num_cpu);
> > +            /* Did we not create the VCPUs in the kernel yet? */
> > +            abort();
> > +    }
> > +
> > +    /* TODO: Consider checking compatibility with the IIDR ? */
> > +
> > +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> > +    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
> > +
> > +    /* s->irq_target[irq] -> GICD_ITARGETSRn
> > +     * (restore targets before pending to ensure the pending state is set on
> > +     * the appropriate CPU interfaces in the kernel) */
> > +    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
> > +
> > +    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
> > +    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
> > +
> > +    /* irq_state[n].active -> GICD_ISACTIVERn */
> > +    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
> > +
> > +    /* irq_state[n].trigger -> GICD_ICFRn */
> > +    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
> > +
> > +    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
> > +    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
> > +
> > +    /* s->sgi_source -> ICD_CPENDSGIRn */
> > +    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
> > +    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
> 
> Again what are these magic numbers? I assume the comments refer to the
> actual reg names in the manual? Perhaps putting a reference to the
> manual if these values aren't to be used else where?
> 
These are offsets (as indicated by the parameter name in kvm_dist_put)
for the corresponding registers, and yes, the comments are the reg names
used in the manual.  As I said above, you're lost here if you're not
looking at the manual while looking at the code, but I really do want to
clean up this part and share the offsets with arm_gic.c later, add nice
references to the manual and a whole bunch of other stuff that could be
made much nicer, but I'd really like to get this functional piece of
code in first.

Thanks,
-Christoffer
Christoffer Dall - Nov. 19, 2013, 4:17 a.m.
On Tue, Oct 15, 2013 at 12:15:03PM +0100, Peter Maydell wrote:
> On 26 September 2013 22:03, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> > QEMU to marshal the GICState data structure and therefore simply
> > synchronize the kernel state with the QEMU emulated state in both
> > directions.
> >
> > We take some care on the restore path to check the VGIC has been
> > configured with enough IRQs and CPU interfaces that we can properly
> > restore the state, and for separate set/clear registers we first fully
> > clear the registers and then set the required bits.
> >
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> >
> > Changelog [v2]:
> >  - Remove num_irq from GIC VMstate structure
> >  - Increment GIC VMstate version number
> >  - Use extract32/deposit32 for bit-field modifications
> >  - Address other smaller review comments
> >  - Renames kvm_arm_gic_dist_[readr/writer] functions to
> >    kvm_dist_[get/put] and shortened other function names
> >  - Use concrete format for APRn
> > ---
> >  hw/intc/arm_gic_common.c |    5 +-
> >  hw/intc/arm_gic_kvm.c    |  424 +++++++++++++++++++++++++++++++++++++++++++++-
> >  hw/intc/gic_internal.h   |    8 +
> >  3 files changed, 433 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> > index 5449d77..1d3b738 100644
> > --- a/hw/intc/arm_gic_common.c
> > +++ b/hw/intc/arm_gic_common.c
> > @@ -58,8 +58,8 @@ static const VMStateDescription vmstate_gic_irq_state = {
> >
> >  static const VMStateDescription vmstate_gic = {
> >      .name = "arm_gic",
> > -    .version_id = 6,
> > -    .minimum_version_id = 6,
> > +    .version_id = 7,
> > +    .minimum_version_id = 7,
> >      .pre_save = gic_pre_save,
> >      .post_load = gic_post_load,
> >      .fields = (VMStateField[]) {
> > @@ -78,6 +78,7 @@ static const VMStateDescription vmstate_gic = {
> >          VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
> >          VMSTATE_UINT8_ARRAY(bpr, GICState, NCPU),
> >          VMSTATE_UINT8_ARRAY(abpr, GICState, NCPU),
> > +        VMSTATE_UINT32_2DARRAY(apr, GICState, 4, NCPU),
> 
> I feel like we should add this new apr state (plus some
> documentation and at least the TCG read/write interface
> to the state) in one patch and then put the save/load
> in its own patch.
> 

Sounds reasonable, I've added a separate patch for the next series.

> > +    err = kvm_device_ioctl(s->dev_fd, type, &attr);
> > +    if (err < 0) {
> > +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> > +                    strerror(-err));
> > +            abort();
> 
> Bad indent.
> 

indeed, sorry about that.

> > +    }
> > +}
> >  static void kvm_arm_gic_put(GICState *s)
> >  {
> > -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> > +    uint32_t reg;
> > +    int i;
> > +    int cpu;
> > +    int num_cpu;
> > +    int num_irq;
> > +
> > +    if (!kvm_arm_gic_can_save_restore(s)) {
> > +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> > +            return;
> > +    }
> > +
> > +    /* Note: We do the restore in a slightly different order than the save
> > +     * (where the order doesn't matter and is simply ordered according to the
> > +     * register offset values */
> > +
> > +    /*****************************************************************
> > +     * Distributor State
> > +     */
> > +
> > +    /* s->enabled -> GICD_CTLR */
> > +    reg = s->enabled;
> > +    kvm_gicd_access(s, 0x0, 0, &reg, true);
> > +
> > +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> > +    kvm_gicd_access(s, 0x4, 0, &reg, false);
> > +    num_irq = ((reg & 0x1f) + 1) * 32;
> > +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> > +
> > +    if (num_irq < s->num_irq) {
> > +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> > +                    s->num_irq, num_irq);
> > +            abort();
> > +    } else if (num_cpu != s->num_cpu ) {
> > +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> > +                    s->num_cpu, num_cpu);
> > +            /* Did we not create the VCPUs in the kernel yet? */
> > +            abort();
> > +    }
> > +
> > +    /* TODO: Consider checking compatibility with the IIDR ? */
> > +
> > +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> > +    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
> > +
> > +    /* s->irq_target[irq] -> GICD_ITARGETSRn
> > +     * (restore targets before pending to ensure the pending state is set on
> > +     * the appropriate CPU interfaces in the kernel) */
> > +    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
> > +
> > +    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
> > +    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
> > +
> > +    /* irq_state[n].active -> GICD_ISACTIVERn */
> > +    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
> > +    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
> > +
> > +    /* irq_state[n].trigger -> GICD_ICFRn */
> > +    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
> > +
> > +    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
> > +    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
> > +
> > +    /* s->sgi_source -> ICD_CPENDSGIRn */
> > +    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
> > +    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
> > +
> > +
> > +    /*****************************************************************
> > +     * CPU Interface(s) State
> > +     */
> > +
> > +    for (cpu = 0; cpu < s->num_cpu; cpu++) {
> > +        /* s->cpu_enabled[cpu] -> GICC_CTLR */
> > +        reg = s->cpu_enabled[cpu];
> > +        kvm_gicc_access(s, 0x00, cpu, &reg, true);
> > +
> > +        /* s->priority_mask[cpu] -> GICC_PMR */
> > +        reg = (s->priority_mask[cpu] & 0xff);
> > +        kvm_gicc_access(s, 0x04, cpu, &reg, true);
> > +
> > +        /* s->bpr[cpu] -> GICC_BPR */
> > +        reg = (s->bpr[cpu] & 0x7);
> > +        kvm_gicc_access(s, 0x08, cpu, &reg, true);
> > +
> > +        /* s->abpr[cpu] -> GICC_ABPR */
> > +        reg = (s->abpr[cpu] & 0x7);
> > +        kvm_gicc_access(s, 0x1c, cpu, &reg, true);
> > +
> > +        /* s->apr[n][cpu] -> GICC_APRn */
> > +        for (i = 0; i < 4; i++) {
> > +            reg = s->apr[i][cpu];
> > +            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, true);
> > +        }
> > +    }
> >  }
> 
> So an interesting question here is "is there state we currently
> save/restore for the TCG GIC which we're not passing to the
> kernel, and if so why?". I think the fields we don't do anything
> with are:
>  gic_irq_state::model

the model field specifies if the interrup uses the 1:N or the N:N model.
In reasonably recent versions of the GIC all SGIs are N:N and all
peripheral interrupts are 1:N and it is therefore a static property for
a GIC v2.0 that we don't need to save and restore.  QEMU's model does
seem to emulate an older version and a newer version at the same time,
and this logic is slightly broken for the ICFGRn handling, but this
would require a discussion of exactly what QEMU should be supporting and
what we may break if we decide to support one thing vs. another.

>  GICState::last_active
>  GICState::running_irq
>  GICState::running_priority

As far as I understand the code, these three are used to implement the
running priority support on the QEMU emulated CPU interface side.  This
state is implemented in hardware for the virtual CPU interface for the
VGIC based on the information in the list registers and the
GICD_ISACTIVER and registers.  I think the QEMU code code be changed to
look at the active state of the registers and with a coherent CPU
affinity model should be able to reproduce this state on the fly without
keeping these extra state variables around, but I'm not 100% confident.

>  GICState::current_pending

Again, the virtual CPU register generates this state based on the LRs
present on the interface.  The in-kernel distributor emulation needs to
choose the right interrupts to queue based on priority if prorities were
to be supported in the kernel.  The necessary state, however, is
preserved through the registers that we do save an restore.

> 
> I'm guessing these are a mix of "kernel doesn't implement
> something" and "TCG model is wrong and this state should
> show up somewhere else"; but which is which?

So I think the answer to all of the above is that this is implementation
state that QEMU carries because it makes it more efficient/easier or
just because that was the way it was written, and the kernel does
maintain this state via the current registers being saved and restored.

This of course implies that the QEMU state is somewhat redundant, but
this may be on purpose.

Do you feel it is necessary for this sort of documentation to go into
the source somewhere, or would you just like to clear it up for the
review?


> > diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
> > index 758b85a..f9133b9 100644
> > --- a/hw/intc/gic_internal.h
> > +++ b/hw/intc/gic_internal.h
> > @@ -99,6 +99,14 @@ typedef struct GICState {
> >      uint8_t  bpr[NCPU];
> >      uint8_t  abpr[NCPU];
> >
> > +    /* The APR is implementation defined, so we choose a layout identical to
> > +     * the KVM ABI layout for QEMU's implementation of the gic:
> > +     * If one or more interrupts for preemption level X are active, then
> > +     *   APRn[X mod 32] == 0b1,  where n = X / 32
> > +     * otherwise the bit is clear.
> > +     */
> > +    uint32_t apr[4][NCPU];
> 
> You can delete the "or more" -- there can onyl be one
> interrupt active at each group priority (GICv2 spec 3.3.3).
> 
> Also, where does the hardcoded 4 come from? Answer: it's
> the worst-case number of GICC_APRn registers, which happens
> if you have 7 bits of group priority, which means you have
> 128 preemption levels, which at one bit per level fits into
> 4 32 bit APR regs. I think a GIC_NR_APRS #define might be nice.
> 

Fair enough, I added one in the separate patch based on a new define for
max number of preemption levels.

> > +
> >      uint32_t num_cpu;
> >
> >      MemoryRegion iomem; /* Distributor */
> > --
> > 1.7.10.4

Thanks for the review!
-Christoffer

Patch

diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 5449d77..1d3b738 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -58,8 +58,8 @@  static const VMStateDescription vmstate_gic_irq_state = {
 
 static const VMStateDescription vmstate_gic = {
     .name = "arm_gic",
-    .version_id = 6,
-    .minimum_version_id = 6,
+    .version_id = 7,
+    .minimum_version_id = 7,
     .pre_save = gic_pre_save,
     .post_load = gic_post_load,
     .fields = (VMStateField[]) {
@@ -78,6 +78,7 @@  static const VMStateDescription vmstate_gic = {
         VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
         VMSTATE_UINT8_ARRAY(bpr, GICState, NCPU),
         VMSTATE_UINT8_ARRAY(abpr, GICState, NCPU),
+        VMSTATE_UINT32_2DARRAY(apr, GICState, 4, NCPU),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 158f047..1510c4d 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -3,6 +3,7 @@ 
  *
  * Copyright (c) 2012 Linaro Limited
  * Written by Peter Maydell
+ * Save/Restore logic added by Christoffer Dall.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,6 +24,20 @@ 
 #include "kvm_arm.h"
 #include "gic_internal.h"
 
+//#define DEBUG_GIC_KVM
+
+#ifdef DEBUG_GIC_KVM
+static const int debug_gic_kvm = 1;
+#else
+static const int debug_gic_kvm = 0;
+#endif
+
+#define DPRINTF(fmt, ...) do { \
+        if (debug_gic_kvm) { \
+            printf("arm_gic: " fmt , ## __VA_ARGS__); \
+        } \
+    } while (0)
+
 #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
 #define KVM_ARM_GIC(obj) \
      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
@@ -72,14 +87,419 @@  static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
     kvm_set_irq(kvm_state, kvm_irq, !!level);
 }
 
+static bool kvm_arm_gic_can_save_restore(GICState *s)
+{
+    return s->dev_fd >= 0;
+}
+
+static void kvm_gic_access(GICState *s, int group, int offset,
+                                   int cpu, uint32_t *val, bool write)
+{
+    struct kvm_device_attr attr;
+    int type;
+    int err;
+
+    cpu = cpu & 0xff;
+
+    attr.flags = 0;
+    attr.group = group;
+    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
+                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
+                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
+                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
+    attr.addr = (uintptr_t)val;
+
+    if (write) {
+        type = KVM_SET_DEVICE_ATTR;
+    } else {
+        type = KVM_GET_DEVICE_ATTR;
+    }
+
+    err = kvm_device_ioctl(s->dev_fd, type, &attr);
+    if (err < 0) {
+            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+                    strerror(-err));
+            abort();
+    }
+}
+
+static void kvm_gicd_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                   offset, cpu, val, write);
+}
+
+static void kvm_gicc_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                   offset, cpu, val, write);
+}
+
+#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
+    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
+
+/*
+ * Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation.
+ */
+typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
+                                  uint32_t *field, bool to_kernel);
+
+/* synthetic translate function used for clear/set registers to completely
+ * clear a setting using a clear-register before setting the remaing bits
+ * using a set-register */
+static void translate_clear(GICState *s, int irq, int cpu,
+                            uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = ~0;
+    } else {
+        /* does not make sense: qemu model doesn't use set/clear regs */
+        abort();
+    }
+}
+
+static void translate_enabled(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ENABLED(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ENABLED(irq, cm);
+        }
+    }
+}
+
+static void translate_pending(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_PENDING(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_PENDING(irq, cm);
+            /* TODO: Capture is level-line is held high in the kernel */
+        }
+    }
+}
+
+static void translate_active(GICState *s, int irq, int cpu,
+                             uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ACTIVE(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ACTIVE(irq, cm);
+        }
+    }
+}
+
+static void translate_trigger(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = (GIC_TEST_TRIGGER(irq)) ? 0x2 : 0x0;
+    } else {
+        if (*field & 0x2) {
+            GIC_SET_TRIGGER(irq);
+        }
+    }
+}
+
+static void translate_priority(GICState *s, int irq, int cpu,
+                               uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
+    } else {
+        gic_set_priority(s, cpu, irq, *field & 0xff);
+    }
+}
+
+static void translate_targets(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->irq_target[irq] & 0xff;
+    } else {
+        s->irq_target[irq] = *field & 0xff;
+    }
+}
+
+static void translate_sgisource(GICState *s, int irq, int cpu,
+                                uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->sgi_source[irq][cpu] & 0xff;
+    } else {
+        s->sgi_source[irq][cpu] = *field & 0xff;
+    }
+}
+
+/* Read a register group from the kernel VGIC */
+static void kvm_dist_get(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            kvm_gicd_access(s, offset, cpu, &reg, false);
+            for (j = 0; j < regsz; j++) {
+                field = extract32(reg, j * width, width);
+                translate_fn(s, irq + j, cpu, &field, false);
+            }
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
+/* Write a register group to the kernel VGIC */
+static void kvm_dist_put(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            reg = 0;
+            for (j = 0; j < regsz; j++) {
+                translate_fn(s, irq + j, cpu, &field, true);
+                reg = deposit32(reg, j * width, width, field);
+            }
+            kvm_gicd_access(s, offset, cpu, &reg, true);
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
 static void kvm_arm_gic_put(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to set the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+    int num_cpu;
+    int num_irq;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot put kernel gic state, no kernel interface");
+            return;
+    }
+
+    /* Note: We do the restore in a slightly different order than the save
+     * (where the order doesn't matter and is simply ordered according to the
+     * register offset values */
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* s->enabled -> GICD_CTLR */
+    reg = s->enabled;
+    kvm_gicd_access(s, 0x0, 0, &reg, true);
+
+    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+    num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (num_irq < s->num_irq) {
+            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
+                    s->num_irq, num_irq);
+            abort();
+    } else if (num_cpu != s->num_cpu ) {
+            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
+                    s->num_cpu, num_cpu);
+            /* Did we not create the VCPUs in the kernel yet? */
+            abort();
+    }
+
+    /* TODO: Consider checking compatibility with the IIDR ? */
+
+    /* irq_state[n].enabled -> GICD_ISENABLERn */
+    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* s->irq_target[irq] -> GICD_ITARGETSRn
+     * (restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel) */
+    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
+    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* irq_state[n].active -> GICD_ISACTIVERn */
+    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* irq_state[n].trigger -> GICD_ICFRn */
+    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
+    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* s->sgi_source -> ICD_CPENDSGIRn */
+    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
+    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* s->cpu_enabled[cpu] -> GICC_CTLR */
+        reg = s->cpu_enabled[cpu];
+        kvm_gicc_access(s, 0x00, cpu, &reg, true);
+
+        /* s->priority_mask[cpu] -> GICC_PMR */
+        reg = (s->priority_mask[cpu] & 0xff);
+        kvm_gicc_access(s, 0x04, cpu, &reg, true);
+
+        /* s->bpr[cpu] -> GICC_BPR */
+        reg = (s->bpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x08, cpu, &reg, true);
+
+        /* s->abpr[cpu] -> GICC_ABPR */
+        reg = (s->abpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x1c, cpu, &reg, true);
+
+        /* s->apr[n][cpu] -> GICC_APRn */
+        for (i = 0; i < 4; i++) {
+            reg = s->apr[i][cpu];
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, true);
+        }
+    }
 }
 
 static void kvm_arm_gic_get(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to get the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot get kernel gic state, no kernel interface");
+            return;
+    }
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* GICD_CTLR -> s->enabled */
+    kvm_gicd_access(s, 0x0, 0, &reg, false);
+    s->enabled = reg & 1;
+
+    /* Sanity checking on GICD_TYPER -> s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    s->num_irq = ((reg & 0x1f) + 1) * 32;
+    s->num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (s->num_irq > GIC_MAXIRQ) {
+            fprintf(stderr, "Too many IRQs reported from the kernel: %d\n",
+                    s->num_irq);
+            abort();
+    }
+
+    /* GICD_IIDR -> ? */
+    kvm_gicd_access(s, 0x8, 0, &reg, false);
+
+    /* Verify no GROUP 1 interrupts configured in the kernel */
+    for_each_irq_reg(i, s->num_irq, 1) {
+        kvm_gicd_access(s, 0x80 + (i * 4), 0, &reg, false);
+        if (reg != 0) {
+            fprintf(stderr, "Unsupported GICD_IGROUPRn value: %08x\n",
+                    reg);
+            abort();
+        }
+    }
+
+    /* Clear all the IRQ settings */
+    for (i = 0; i < s->num_irq; i++) {
+        memset(&s->irq_state[i], 0, sizeof(s->irq_state[0]));
+    }
+
+    /* GICD_ISENABLERn -> irq_state[n].enabled */
+    kvm_dist_get(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* GICD_ISPENDRn -> irq_state[n].pending + irq_state[n].level */
+    kvm_dist_get(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* GICD_ISACTIVERn -> irq_state[n].active */
+    kvm_dist_get(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* GICD_ICFRn -> irq_state[n].trigger */
+    kvm_dist_get(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* GICD_IPRIORITYRn -> s->priorityX[irq] */
+    kvm_dist_get(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* GICD_ITARGETSRn -> s->irq_target[irq] */
+    kvm_dist_get(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* GICD_CPENDSGIRn -> s->sgi_source */
+    kvm_dist_get(s, 0xf10, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* GICC_CTLR -> s->cpu_enabled[cpu] */
+        kvm_gicc_access(s, 0x00, cpu, &reg, false);
+        s->cpu_enabled[cpu] = (reg & 1);
+
+        /* GICC_PMR -> s->priority_mask[cpu] */
+        kvm_gicc_access(s, 0x04, cpu, &reg, false);
+        s->priority_mask[cpu] = (reg & 0xff);
+
+        /* GICC_BPR -> s->bpr[cpu] */
+        kvm_gicc_access(s, 0x08, cpu, &reg, false);
+        s->bpr[cpu] = (reg & 0x7);
+
+        /* GICC_ABPR -> s->abpr[cpu] */
+        kvm_gicc_access(s, 0x1c, cpu, &reg, false);
+        s->abpr[cpu] = (reg & 0x7);
+
+        /* GICC_APRn -> s->apr[n][cpu] */
+        for (i = 0; i < 4; i++) {
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, false);
+            s->apr[i][cpu] = reg;
+        }
+    }
 }
 
 static void kvm_arm_gic_reset(DeviceState *dev)
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 758b85a..f9133b9 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -99,6 +99,14 @@  typedef struct GICState {
     uint8_t  bpr[NCPU];
     uint8_t  abpr[NCPU];
 
+    /* The APR is implementation defined, so we choose a layout identical to
+     * the KVM ABI layout for QEMU's implementation of the gic:
+     * If one or more interrupts for preemption level X are active, then
+     *   APRn[X mod 32] == 0b1,  where n = X / 32
+     * otherwise the bit is clear.
+     */
+    uint32_t apr[4][NCPU];
+
     uint32_t num_cpu;
 
     MemoryRegion iomem; /* Distributor */