diff mbox series

[V3,2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

Message ID 1527047633-12368-2-git-send-email-zhaoshenglong@huawei.com
State New
Headers show
Series [V3,1/2] arm_gicv3_kvm: increase clroffset accordingly | expand

Commit Message

Shannon Zhao May 23, 2018, 3:53 a.m. UTC
While we skip the GIC_INTERNAL irqs, we don't change the register offset
accordingly. This will overlap the GICR registers value and leave the
last GIC_INTERNAL irq's registers out of update.

Fix this by skipping the registers banked by GICR.

Also for migration compatibility if the migration source (old version
qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
the data of PPI to get the right data for SPI.

Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
Cc: qemu-stable@nongnu.org
Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
---
Changes in V3: add migration compatibility and fix code style
---
 hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
 hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
 include/hw/intc/arm_gicv3_common.h |  1 +
 3 files changed, 92 insertions(+), 1 deletion(-)

Comments

Eric Auger May 24, 2018, 9:04 a.m. UTC | #1
Hi Shannon,

On 05/23/2018 05:53 AM, Shannon Zhao wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
> 
> Fix this by skipping the registers banked by GICR.
> 
> Also for migration compatibility if the migration source (old version
> qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
> the data of PPI to get the right data for SPI.
> 
> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
> Cc: qemu-stable@nongnu.org
> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
> ---
> Changes in V3: add migration compatibility and fix code style
> ---
>  hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
>  hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
>  include/hw/intc/arm_gicv3_common.h |  1 +
>  3 files changed, 92 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
> index 7b54d52..f93e5d2 100644
> --- a/hw/intc/arm_gicv3_common.c
> +++ b/hw/intc/arm_gicv3_common.c
> @@ -141,6 +141,38 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>      }
>  };
>  
> +static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
> +{
> +    GICv3State *cs = opaque;
> +
> +   /*
> +    * If the gicd_no_shift_bug subsection is not transferred this
> +    * means gicd_no_shift_bug is 0x0 (which might not be the same as
> +    * our reset value).
> +    */
> +    cs->gicd_no_shift_bug = 0x0;
> +    return 0;
> +}
> +
> +static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
> +{
> +    GICv3State *cs = opaque;
> +
> +    return cs->gicd_no_shift_bug;
> +}
> +
> +const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
> +    .name = "arm_gicv3/gicd_no_shift_bug",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
> +    .needed = gicv3_gicd_no_shift_bug_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_gicv3 = {
>      .name = "arm_gicv3",
>      .version_id = 1,
> @@ -165,6 +197,10 @@ static const VMStateDescription vmstate_gicv3 = {
>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>                                               vmstate_gicv3_cpu, GICv3CPUState),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_gicv3_gicd_no_shift_bug,
> +        NULL
>      }
>  };
>  
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 3536795..bd961f1 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      int irq;
>  
>      field = (uint32_t *)bmp;
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
> +     * sync them.
> +     */
> +    offset += (8 * sizeof(uint32_t));
Now I am unclear about the semantics of the s->gicd_ipriority & friends.
With that change, is it supposed to contain only the states of SPIs or
contain the RAZ states of PPI/SGIs + states of SPIs. The array is
dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
shouldn't we also shift field?

Thanks

Eric
>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>          kvm_gicd_access(s, offset, &reg, false);
>          *field = reg;
> @@ -149,7 +155,18 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      uint32_t reg, *field;
>      int irq;
>  
> -    field = (uint32_t *)bmp;
> +    if (!s->gicd_no_shift_bug) {
> +        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
> +    } else {
> +        field = (uint32_t *)bmp;
> +    }
> +
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
> +     * sync them.
> +     */
> +    offset += (8 * sizeof(uint32_t));
in the old to new migration scenario, IIUC,
bmp contains state of 32 internal PPI/SGI(RAZ) + s->num_irq-32 SPIs
Here aren't you "putting" the RAZ state of the 32 PPI/SGIs into the
first 32 SPIs here?

Thanks

Eric
>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>          reg = *field;
>          kvm_gicd_access(s, offset, &reg, true);
> @@ -164,6 +181,12 @@ static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
> +     * them.
> +     */
> +    offset += (2 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>          kvm_gicd_access(s, offset, &reg, false);
>          reg = half_unshuffle32(reg >> 1);
> @@ -181,6 +204,16 @@ static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    if (!s->gicd_no_shift_bug) {
> +        bmp += (2 * sizeof(uint32_t));
> +    }
> +
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
> +     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
> +     * them.
> +     */
> +    offset += (2 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 2) {
>          reg = *gic_bmp_ptr32(bmp, irq);
>          if (irq % 32 != 0) {
> @@ -222,6 +255,12 @@ static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
>      uint32_t reg;
>      int irq;
>  
> +    /* For the KVM GICv3, affinity routing is always enabled, and the
> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
> +     * are always RAZ/WI. The corresponding functionality is replaced by the
> +     * GICR registers. So it doesn't need to sync them.
> +     */
> +    offset += (1 * sizeof(uint32_t));
>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>          kvm_gicd_access(s, offset, &reg, false);
>          *gic_bmp_ptr32(bmp, irq) = reg;
> @@ -235,6 +274,20 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>  
> +    if (!s->gicd_no_shift_bug) {
> +        bmp += (1 * sizeof(uint32_t));
> +    }
> +
> +    /* For the KVM GICv3, affinity routing is always enabled, and the
> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
> +     * are always RAZ/WI. The corresponding functionality is replaced by the
> +     * GICR registers. So it doesn't need to sync them.
> +     */
> +    offset += (1 * sizeof(uint32_t));
> +    if (clroffset != 0) {
> +        clroffset += (1 * sizeof(uint32_t));
> +    }
> +
>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>          /* If this bitmap is a set/clear register pair, first write to the
>           * clear-reg to clear all bits before using the set-reg to write
> @@ -651,6 +704,7 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
>          return;
>      }
>  
> +    s->gicd_no_shift_bug = 1;
>      kvm_arm_gicv3_put(s);
>  }
>  
> diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
> index bccdfe1..13c28c0 100644
> --- a/include/hw/intc/arm_gicv3_common.h
> +++ b/include/hw/intc/arm_gicv3_common.h
> @@ -217,6 +217,7 @@ struct GICv3State {
>      uint32_t revision;
>      bool security_extn;
>      bool irq_reset_nonsecure;
> +    bool gicd_no_shift_bug;
>  
>      int dev_fd; /* kvm device fd if backed by kvm vgic support */
>      Error *migration_blocker;
>
Shannon Zhao May 24, 2018, 9:20 a.m. UTC | #2
On 2018/5/24 17:04, Auger Eric wrote:
> Hi Shannon,
> 
> On 05/23/2018 05:53 AM, Shannon Zhao wrote:
>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>> accordingly. This will overlap the GICR registers value and leave the
>> last GIC_INTERNAL irq's registers out of update.
>>
>> Fix this by skipping the registers banked by GICR.
>>
>> Also for migration compatibility if the migration source (old version
>> qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
>> the data of PPI to get the right data for SPI.
>>
>> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
>> Cc: qemu-stable@nongnu.org
>> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
>> ---
>> Changes in V3: add migration compatibility and fix code style
>> ---
>>  hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
>>  hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
>>  include/hw/intc/arm_gicv3_common.h |  1 +
>>  3 files changed, 92 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>> index 7b54d52..f93e5d2 100644
>> --- a/hw/intc/arm_gicv3_common.c
>> +++ b/hw/intc/arm_gicv3_common.c
>> @@ -141,6 +141,38 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>      }
>>  };
>>  
>> +static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
>> +{
>> +    GICv3State *cs = opaque;
>> +
>> +   /*
>> +    * If the gicd_no_shift_bug subsection is not transferred this
>> +    * means gicd_no_shift_bug is 0x0 (which might not be the same as
>> +    * our reset value).
>> +    */
>> +    cs->gicd_no_shift_bug = 0x0;
>> +    return 0;
>> +}
>> +
>> +static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
>> +{
>> +    GICv3State *cs = opaque;
>> +
>> +    return cs->gicd_no_shift_bug;
>> +}
>> +
>> +const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
>> +    .name = "arm_gicv3/gicd_no_shift_bug",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
>> +    .needed = gicv3_gicd_no_shift_bug_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>  static const VMStateDescription vmstate_gicv3 = {
>>      .name = "arm_gicv3",
>>      .version_id = 1,
>> @@ -165,6 +197,10 @@ static const VMStateDescription vmstate_gicv3 = {
>>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>>                                               vmstate_gicv3_cpu, GICv3CPUState),
>>          VMSTATE_END_OF_LIST()
>> +    },
>> +    .subsections = (const VMStateDescription * []) {
>> +        &vmstate_gicv3_gicd_no_shift_bug,
>> +        NULL
>>      }
>>  };
>>  
>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>> index 3536795..bd961f1 100644
>> --- a/hw/intc/arm_gicv3_kvm.c
>> +++ b/hw/intc/arm_gicv3_kvm.c
>> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      int irq;
>>  
>>      field = (uint32_t *)bmp;
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>> +     * sync them.
>> +     */
>> +    offset += (8 * sizeof(uint32_t));
> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
> With that change, is it supposed to contain only the states of SPIs or
> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
> shouldn't we also shift field?
> 
Yes, it should also shift the field. But not sure if this will affect
the new to old migration scenario. Let me think about this.

> Thanks
> 
> Eric
>>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>>          kvm_gicd_access(s, offset, &reg, false);
>>          *field = reg;
>> @@ -149,7 +155,18 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      uint32_t reg, *field;
>>      int irq;
>>  
>> -    field = (uint32_t *)bmp;
>> +    if (!s->gicd_no_shift_bug) {
>> +        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
>> +    } else {
>> +        field = (uint32_t *)bmp;
>> +    }
>> +
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>> +     * sync them.
>> +     */
>> +    offset += (8 * sizeof(uint32_t));
> in the old to new migration scenario, IIUC,
> bmp contains state of 32 internal PPI/SGI(RAZ) + s->num_irq-32 SPIs
> Here aren't you "putting" the RAZ state of the 32 PPI/SGIs into the
> first 32 SPIs here?
> 
Please see above code, this new version patch I add a flag for migration
compat suggested as Peter. If it migrates from old qemu, the
gicd_no_shift_bug will be 0, then it will shift the bmp, while that will
drop the data of PPI/SGI and also the last 32 SPIs info will be empty.
None of current machines use last 32 SPIs, so it's fine for the moment.

Thanks,
Eric Auger May 24, 2018, 12:10 p.m. UTC | #3
Hi Shannon,

On 05/24/2018 11:20 AM, Shannon Zhao wrote:
> 
> 
> On 2018/5/24 17:04, Auger Eric wrote:
>> Hi Shannon,
>>
>> On 05/23/2018 05:53 AM, Shannon Zhao wrote:
>>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>>> accordingly. This will overlap the GICR registers value and leave the
>>> last GIC_INTERNAL irq's registers out of update.
>>>
>>> Fix this by skipping the registers banked by GICR.
>>>
>>> Also for migration compatibility if the migration source (old version
>>> qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
>>> the data of PPI to get the right data for SPI.
>>>
>>> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
>>> Cc: qemu-stable@nongnu.org
>>> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
>>> ---
>>> Changes in V3: add migration compatibility and fix code style
>>> ---
>>>  hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
>>>  hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
>>>  include/hw/intc/arm_gicv3_common.h |  1 +
>>>  3 files changed, 92 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>>> index 7b54d52..f93e5d2 100644
>>> --- a/hw/intc/arm_gicv3_common.c
>>> +++ b/hw/intc/arm_gicv3_common.c
>>> @@ -141,6 +141,38 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>>      }
>>>  };
>>>  
>>> +static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
>>> +{
>>> +    GICv3State *cs = opaque;
>>> +
>>> +   /*
>>> +    * If the gicd_no_shift_bug subsection is not transferred this
>>> +    * means gicd_no_shift_bug is 0x0 (which might not be the same as
>>> +    * our reset value).
>>> +    */
>>> +    cs->gicd_no_shift_bug = 0x0;
>>> +    return 0;
>>> +}
>>> +
>>> +static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
>>> +{
>>> +    GICv3State *cs = opaque;
>>> +
>>> +    return cs->gicd_no_shift_bug;
>>> +}
>>> +
>>> +const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
>>> +    .name = "arm_gicv3/gicd_no_shift_bug",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
>>> +    .needed = gicv3_gicd_no_shift_bug_needed,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
>>> +        VMSTATE_END_OF_LIST()
>>> +    }
>>> +};
>>> +
>>>  static const VMStateDescription vmstate_gicv3 = {
>>>      .name = "arm_gicv3",
>>>      .version_id = 1,
>>> @@ -165,6 +197,10 @@ static const VMStateDescription vmstate_gicv3 = {
>>>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>>>                                               vmstate_gicv3_cpu, GICv3CPUState),
>>>          VMSTATE_END_OF_LIST()
>>> +    },
>>> +    .subsections = (const VMStateDescription * []) {
>>> +        &vmstate_gicv3_gicd_no_shift_bug,
>>> +        NULL
>>>      }
>>>  };
>>>  
>>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>>> index 3536795..bd961f1 100644
>>> --- a/hw/intc/arm_gicv3_kvm.c
>>> +++ b/hw/intc/arm_gicv3_kvm.c
>>> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>>      int irq;
>>>  
>>>      field = (uint32_t *)bmp;
>>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>>> +     * sync them.
>>> +     */
>>> +    offset += (8 * sizeof(uint32_t));
>> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
>> With that change, is it supposed to contain only the states of SPIs or
>> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
>> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
>> shouldn't we also shift field?
>>
> Yes, it should also shift the field. But not sure if this will affect
> the new to old migration scenario. Let me think about this.

Assuming we shift "field" and leave the space meant for PPI/SGI then my
understanding is we wouldn't need the subsection. We would just add the
last 32 SPI states in the array/bitmap.

Thanks

Eric
> 
>> Thanks
>>
>> Eric
>>>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>>>          kvm_gicd_access(s, offset, &reg, false);
>>>          *field = reg;
>>> @@ -149,7 +155,18 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>>      uint32_t reg, *field;
>>>      int irq;
>>>  
>>> -    field = (uint32_t *)bmp;
>>> +    if (!s->gicd_no_shift_bug) {
>>> +        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
>>> +    } else {
>>> +        field = (uint32_t *)bmp;
>>> +    }
>>> +
>>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>>> +     * sync them.
>>> +     */
>>> +    offset += (8 * sizeof(uint32_t));
>> in the old to new migration scenario, IIUC,
>> bmp contains state of 32 internal PPI/SGI(RAZ) + s->num_irq-32 SPIs
>> Here aren't you "putting" the RAZ state of the 32 PPI/SGIs into the
>> first 32 SPIs here?
>>
> Please see above code, this new version patch I add a flag for migration
> compat suggested as Peter. If it migrates from old qemu, the
> gicd_no_shift_bug will be 0, then it will shift the bmp, while that will
> drop the data of PPI/SGI and also the last 32 SPIs info will be empty.
> None of current machines use last 32 SPIs, so it's fine for the moment.
> 
> Thanks,
>
Peter Maydell May 24, 2018, 1:11 p.m. UTC | #4
On 23 May 2018 at 04:53, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
>
> Fix this by skipping the registers banked by GICR.
>
> Also for migration compatibility if the migration source (old version
> qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
> the data of PPI to get the right data for SPI.
>
> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
> Cc: qemu-stable@nongnu.org
> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
> ---
> Changes in V3: add migration compatibility and fix code style
> ---
>  hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
>  hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
>  include/hw/intc/arm_gicv3_common.h |  1 +
>  3 files changed, 92 insertions(+), 1 deletion(-)
>
> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
> index 7b54d52..f93e5d2 100644
> --- a/hw/intc/arm_gicv3_common.c
> +++ b/hw/intc/arm_gicv3_common.c
> @@ -141,6 +141,38 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>      }
>  };
>
> +static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
> +{
> +    GICv3State *cs = opaque;
> +
> +   /*
> +    * If the gicd_no_shift_bug subsection is not transferred this
> +    * means gicd_no_shift_bug is 0x0 (which might not be the same as
> +    * our reset value).
> +    */

This comment seems to have been copied from a similar one about
SRE_EL1, and I think it's a bit misleading here. For icc_sre_el1,
that is a guest-visible struct value which we set to something in the
device's reset function. This gicd_no_shift_bug field is
only for the benefit of migration.

This comment is the ideal place to explain the semantics of gicd_no_shift_bug
and why we have to use it.

You should also only set this if KVM is enabled, because the TCG
GIC gets the semantics of the data structure right.

> +    cs->gicd_no_shift_bug = 0x0;
> +    return 0;
> +}
> +
> +static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
> +{
> +    GICv3State *cs = opaque;
> +
> +    return cs->gicd_no_shift_bug;
> +}
> +
> +const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
> +    .name = "arm_gicv3/gicd_no_shift_bug",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
> +    .needed = gicv3_gicd_no_shift_bug_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};

You also need a post-load function, because that is where you want
to fix up the incoming data (by memcpy'ing it into the right place).

> +
>  static const VMStateDescription vmstate_gicv3 = {
>      .name = "arm_gicv3",
>      .version_id = 1,
> @@ -165,6 +197,10 @@ static const VMStateDescription vmstate_gicv3 = {
>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>                                               vmstate_gicv3_cpu, GICv3CPUState),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_gicv3_gicd_no_shift_bug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 3536795..bd961f1 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      int irq;
>
>      field = (uint32_t *)bmp;
> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
> +     * sync them.
> +     */

This is true, but not why we need to add to the offset. We need
to add to the offset because for_each_dist_irq_reg()'s loop
handles irq numbers starting from GIC_INTERNAL, but the offset
we have is for the start of the GICD_IPRIORITYR<n> register range,
which includes space for the irqs 0..GIC_INTERNAL-1.

> +    offset += (8 * sizeof(uint32_t));

Possibly these offset changes would be clearer written as

     offset += (GIC_INTERNAL * bits-per-irq) / 8;

where bits-per-irq is the same as the last argument to for_each_dist_irq_reg()?

>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>          kvm_gicd_access(s, offset, &reg, false);
>          *field = reg;
> @@ -149,7 +155,18 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>      uint32_t reg, *field;
>      int irq;
>
> -    field = (uint32_t *)bmp;
> +    if (!s->gicd_no_shift_bug) {
> +        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
> +    } else {
> +        field = (uint32_t *)bmp;
> +    }

As noted above, don't try to fix the migration compat problem up here.
The code for syncing registers should just assume the data structure
is being used correctly.

> @@ -235,6 +274,20 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
>      uint32_t reg;
>      int irq;
>
> +    if (!s->gicd_no_shift_bug) {
> +        bmp += (1 * sizeof(uint32_t));
> +    }

As noted above, don't try to fix the migration compat problem up here.
The code for syncing registers should just assume the data structure
is being used correctly.

> +
> +    /* For the KVM GICv3, affinity routing is always enabled, and the
> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
> +     * are always RAZ/WI. The corresponding functionality is replaced by the
> +     * GICR registers. So it doesn't need to sync them.
> +     */
> +    offset += (1 * sizeof(uint32_t));
> +    if (clroffset != 0) {
> +        clroffset += (1 * sizeof(uint32_t));
> +    }
> +
>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>          /* If this bitmap is a set/clear register pair, first write to the
>           * clear-reg to clear all bits before using the set-reg to write
> @@ -651,6 +704,7 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
>          return;
>      }
>
> +    s->gicd_no_shift_bug = 1;

This is the wrong place to do this reset, because then it won't
be in effect for the TCG GIC.

>      kvm_arm_gicv3_put(s);
>  }
>
> diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
> index bccdfe1..13c28c0 100644
> --- a/include/hw/intc/arm_gicv3_common.h
> +++ b/include/hw/intc/arm_gicv3_common.h
> @@ -217,6 +217,7 @@ struct GICv3State {
>      uint32_t revision;
>      bool security_extn;
>      bool irq_reset_nonsecure;
> +    bool gicd_no_shift_bug;

Probably putting 'migration' in the field name will help to indicate
that it's only a migration compat fixup.

>
>      int dev_fd; /* kvm device fd if backed by kvm vgic support */
>      Error *migration_blocker;
> --
> 2.0.4

thanks
-- PMM
Peter Maydell May 24, 2018, 1:14 p.m. UTC | #5
On 24 May 2018 at 10:04, Auger Eric <eric.auger@redhat.com> wrote:
> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
> With that change, is it supposed to contain only the states of SPIs or
> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
> shouldn't we also shift field?

The semantics of the gicd_ipriority and other data structures are
set by the TCG GIC implementation, and include blank space at
the start where the PPI/SGI bits would live. See this comment
from arm_gicv3_common.h:

 * Each bitmap contains a bit for each interrupt. Although there is
 * space for the PPIs and SGIs, those bits (the first 32) are never
 * used as that state lives in the redistributor. The unused bits are
 * provided purely so that interrupt X's state is always in bit X; this
 * avoids bugs where we forget to subtract GIC_INTERNAL from an
 * interrupt number.

thanks
-- PMM
Eric Auger May 24, 2018, 1:59 p.m. UTC | #6
Hi,

On 05/24/2018 03:14 PM, Peter Maydell wrote:
> On 24 May 2018 at 10:04, Auger Eric <eric.auger@redhat.com> wrote:
>> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
>> With that change, is it supposed to contain only the states of SPIs or
>> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
>> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
>> shouldn't we also shift field?
> 
> The semantics of the gicd_ipriority and other data structures are
> set by the TCG GIC implementation, and include blank space at
> the start where the PPI/SGI bits would live. See this comment
> from arm_gicv3_common.h:
> 
>  * Each bitmap contains a bit for each interrupt. Although there is
>  * space for the PPIs and SGIs, those bits (the first 32) are never
>  * used as that state lives in the redistributor. The unused bits are
>  * provided purely so that interrupt X's state is always in bit X; this
>  * avoids bugs where we forget to subtract GIC_INTERNAL from an
>  * interrupt number.

If I understand Shannon's code correctly, the space for PPIs/SGIs is
currently overwritten by SPI state, hence my comment. If we stick to the
current semantics, can't we just add the last missing 32 SPI states and
we don't need the subsection?

Thanks

Eric
> 
> thanks
> -- PMM
>
Peter Maydell May 24, 2018, 2:16 p.m. UTC | #7
On 24 May 2018 at 14:59, Auger Eric <eric.auger@redhat.com> wrote:
> Hi,
>
> On 05/24/2018 03:14 PM, Peter Maydell wrote:
>> On 24 May 2018 at 10:04, Auger Eric <eric.auger@redhat.com> wrote:
>>> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
>>> With that change, is it supposed to contain only the states of SPIs or
>>> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
>>> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
>>> shouldn't we also shift field?
>>
>> The semantics of the gicd_ipriority and other data structures are
>> set by the TCG GIC implementation, and include blank space at
>> the start where the PPI/SGI bits would live. See this comment
>> from arm_gicv3_common.h:
>>
>>  * Each bitmap contains a bit for each interrupt. Although there is
>>  * space for the PPIs and SGIs, those bits (the first 32) are never
>>  * used as that state lives in the redistributor. The unused bits are
>>  * provided purely so that interrupt X's state is always in bit X; this
>>  * avoids bugs where we forget to subtract GIC_INTERNAL from an
>>  * interrupt number.
>
> If I understand Shannon's code correctly, the space for PPIs/SGIs is
> currently overwritten by SPI state, hence my comment.

Only for KVM, not for TCG, and it's the other way round: we
end up with two lots of PPI/SGI space in the data structure
by mistake. Let me fish out the comment I made on the v2 of this
series:

In the code in master, we have QEMU data structures
(bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
for a 1-bit-per-irq bitmap:
 [0x00000000, irq 32, irq 33, .... ]

When we fill in the values from KVM into these data structures,
we start after the unused space, because the for_each_dist_irq_reg()
macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
the offset value we use for the KVM access, so we start by
reading the RAZ/WI values from KVM, and the data structure
contents end up with:
 [0x00000000, 0x00000000, irq 32, irq 33, ... ]
(and the last irqs wouldn't get transferred).

With this change to the code we will get the offset right and
the data structure will be filled as
 [0x00000000, irq 32, irq 33, .... ]
For TCG, where we never had this bug, this is how the data
structure has always looked.

But for migration from the old version, the data structure
we receive from the migration source will contain the old
broken layout of
 [0x00000000, 0x00000000, irq 32, irq 33, ... ]

So we need in inbound migration to identify when we need
to fix this up (by copying the data down to get rid of that
extra 0x00000000), which is "when KVM is enabled and the source
is not a version new enough to have fixed this bug".

> If we stick to the
> current semantics, can't we just add the last missing 32 SPI states and
> we don't need the subsection?

You need a subsection, because that's how you get migration
compatibility.

thanks
-- PMM
Eric Auger May 24, 2018, 2:40 p.m. UTC | #8
Hi Peter,

On 05/24/2018 04:16 PM, Peter Maydell wrote:
> On 24 May 2018 at 14:59, Auger Eric <eric.auger@redhat.com> wrote:
>> Hi,
>>
>> On 05/24/2018 03:14 PM, Peter Maydell wrote:
>>> On 24 May 2018 at 10:04, Auger Eric <eric.auger@redhat.com> wrote:
>>>> Now I am unclear about the semantics of the s->gicd_ipriority & friends.
>>>> With that change, is it supposed to contain only the states of SPIs or
>>>> contain the RAZ states of PPI/SGIs + states of SPIs. The array is
>>>> dimensionned to contain states for PPI/SGI+SPIs, right? In other words,
>>>> shouldn't we also shift field?
>>>
>>> The semantics of the gicd_ipriority and other data structures are
>>> set by the TCG GIC implementation, and include blank space at
>>> the start where the PPI/SGI bits would live. See this comment
>>> from arm_gicv3_common.h:
>>>
>>>  * Each bitmap contains a bit for each interrupt. Although there is
>>>  * space for the PPIs and SGIs, those bits (the first 32) are never
>>>  * used as that state lives in the redistributor. The unused bits are
>>>  * provided purely so that interrupt X's state is always in bit X; this
>>>  * avoids bugs where we forget to subtract GIC_INTERNAL from an
>>>  * interrupt number.
>>
>> If I understand Shannon's code correctly, the space for PPIs/SGIs is
>> currently overwritten by SPI state, hence my comment.
> 
> Only for KVM, not for TCG, and it's the other way round: we
> end up with two lots of PPI/SGI space in the data structure
> by mistake. Let me fish out the comment I made on the v2 of this
> series:
> 
> In the code in master, we have QEMU data structures
> (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
> irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
> for a 1-bit-per-irq bitmap:
>  [0x00000000, irq 32, irq 33, .... ]
> 
> When we fill in the values from KVM into these data structures,
> we start after the unused space, because the for_each_dist_irq_reg()
> macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
> the offset value we use for the KVM access, so we start by
> reading the RAZ/WI values from KVM, and the data structure
> contents end up with:
>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> (and the last irqs wouldn't get transferred).
In kvm_dist_get_priority (new code), the offset is where we read and
field is where we write, correct? Offset was shifted so we effectively
read in KVM regs the num_irq-32 SPI states now but don't we start
writing at the beginning of bmp, (ie s->gicd_ipriority), at PPI/SGI
offset? What am I missing?

I don't understand you TCG remark above, sorry.

Thanks

Eric
> 
> With this change to the code we will get the offset right and
> the data structure will be filled as
>  [0x00000000, irq 32, irq 33, .... ]
> For TCG, where we never had this bug, this is how the data
> structure has always looked.
> 
> But for migration from the old version, the data structure
> we receive from the migration source will contain the old
> broken layout of
>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
> 
> So we need in inbound migration to identify when we need
> to fix this up (by copying the data down to get rid of that
> extra 0x00000000), which is "when KVM is enabled and the source
> is not a version new enough to have fixed this bug".
> 
>> If we stick to the
>> current semantics, can't we just add the last missing 32 SPI states and
>> we don't need the subsection?
> 
> You need a subsection, because that's how you get migration
> compatibility.
> 
> thanks
> -- PMM
>
Peter Maydell May 24, 2018, 2:56 p.m. UTC | #9
On 24 May 2018 at 15:40, Auger Eric <eric.auger@redhat.com> wrote:
> Hi Peter,
>
> On 05/24/2018 04:16 PM, Peter Maydell wrote:
>> Only for KVM, not for TCG, and it's the other way round: we
>> end up with two lots of PPI/SGI space in the data structure
>> by mistake. Let me fish out the comment I made on the v2 of this
>> series:
>>
>> In the code in master, we have QEMU data structures
>> (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
>> irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
>> for a 1-bit-per-irq bitmap:
>>  [0x00000000, irq 32, irq 33, .... ]
>>
>> When we fill in the values from KVM into these data structures,
>> we start after the unused space, because the for_each_dist_irq_reg()
>> macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
>> the offset value we use for the KVM access, so we start by
>> reading the RAZ/WI values from KVM, and the data structure
>> contents end up with:
>>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
>> (and the last irqs wouldn't get transferred).
> In kvm_dist_get_priority (new code), the offset is where we read and
> field is where we write, correct? Offset was shifted so we effectively
> read in KVM regs the num_irq-32 SPI states now but don't we start
> writing at the beginning of bmp, (ie s->gicd_ipriority), at PPI/SGI
> offset? What am I missing?

Oops, yes, you're right. My explanation applies to the
various other bitmaps, where we are accessing the
fields in the data structure using gic_bmp_ptr32(bmp, irq),
but not to gicd_ipriority[], which we are directly accessing
starting with the first word, not by indexing via bmp[irq].

So we need to handle these two cases differently.
You're correct that for gicd_ipriority[], the code in
master reads and writes to that data structure as:
 [0, 0, ..., 0, irq 32, irq 33, ..., 0, 0, ... 0]
so all the values are in the right place but we:
 (a) unnecessarily read/write zeroes for the PPI/SGI fields
 (b) fail to transfer the last 32 interrupts

We can fix the gicd_ipriority[] case simply by adding
   bmp = GIC_INTERNAL;
before the assignment to 'field' in both kvm_dist_get_priority()
and kvm_dist_put_priority(). This doesn't affect migration
compatibility. We should do this separately from fixing the
other bitmaps, because it's simpler.

> I don't understand you TCG remark above, sorry.

You can migrate a TCG VM as well as a KVM one. The
TCG GICv3 doesn't use any of this code in hw/intc/arm_gicv3_kvm.c,
so it doesn't have any of these bugs. So any fixups for the
KVM bugs so we get migration correct in the
"buggy VM" -> "not buggy VM" situation should not misidentify
a TCG VM as "buggy".

PS: I have a feeling that kvm_dist_get/set_priority have
an endianness problem -- the 'reg' we read from the kernel
will be a 32-bit value with the priority byte for the
lowest-numbered IRQ in its least significant byte, but if
the host is big-endian we'll write that to the wrong offset
in the gicd_priority[] array...

thanks
-- PMM
Peter Maydell May 24, 2018, 2:58 p.m. UTC | #10
On 24 May 2018 at 15:56, Peter Maydell <peter.maydell@linaro.org> wrote:
> Oops, yes, you're right. My explanation applies to the
> various other bitmaps, where we are accessing the
> fields in the data structure using gic_bmp_ptr32(bmp, irq),
> but not to gicd_ipriority[], which we are directly accessing
> starting with the first word, not by indexing via bmp[irq].
>
> So we need to handle these two cases differently.
> You're correct that for gicd_ipriority[], the code in
> master reads and writes to that data structure as:
>  [0, 0, ..., 0, irq 32, irq 33, ..., 0, 0, ... 0]
> so all the values are in the right place but we:
>  (a) unnecessarily read/write zeroes for the PPI/SGI fields
>  (b) fail to transfer the last 32 interrupts
>
> We can fix the gicd_ipriority[] case simply by adding
>    bmp = GIC_INTERNAL;

Oops, I meant
 bmp += GIC_INTERNAL;

thanks
-- PMM
Eric Auger May 24, 2018, 3:09 p.m. UTC | #11
Hi Peter,

On 05/24/2018 04:56 PM, Peter Maydell wrote:
> On 24 May 2018 at 15:40, Auger Eric <eric.auger@redhat.com> wrote:
>> Hi Peter,
>>
>> On 05/24/2018 04:16 PM, Peter Maydell wrote:
>>> Only for KVM, not for TCG, and it's the other way round: we
>>> end up with two lots of PPI/SGI space in the data structure
>>> by mistake. Let me fish out the comment I made on the v2 of this
>>> series:
>>>
>>> In the code in master, we have QEMU data structures
>>> (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
>>> irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
>>> for a 1-bit-per-irq bitmap:
>>>  [0x00000000, irq 32, irq 33, .... ]
>>>
>>> When we fill in the values from KVM into these data structures,
>>> we start after the unused space, because the for_each_dist_irq_reg()
>>> macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
>>> the offset value we use for the KVM access, so we start by
>>> reading the RAZ/WI values from KVM, and the data structure
>>> contents end up with:
>>>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
>>> (and the last irqs wouldn't get transferred).
>> In kvm_dist_get_priority (new code), the offset is where we read and
>> field is where we write, correct? Offset was shifted so we effectively
>> read in KVM regs the num_irq-32 SPI states now but don't we start
>> writing at the beginning of bmp, (ie s->gicd_ipriority), at PPI/SGI
>> offset? What am I missing?
> 
> Oops, yes, you're right. My explanation applies to the
> various other bitmaps, where we are accessing the
> fields in the data structure using gic_bmp_ptr32(bmp, irq),
> but not to gicd_ipriority[], which we are directly accessing
> starting with the first word, not by indexing via bmp[irq].
> 
> So we need to handle these two cases differently.
> You're correct that for gicd_ipriority[], the code in
> master reads and writes to that data structure as:
>  [0, 0, ..., 0, irq 32, irq 33, ..., 0, 0, ... 0]
> so all the values are in the right place but we:
>  (a) unnecessarily read/write zeroes for the PPI/SGI fields
>  (b) fail to transfer the last 32 interrupts
> 
> We can fix the gicd_ipriority[] case simply by adding
>    bmp = GIC_INTERNAL;
> before the assignment to 'field' in both kvm_dist_get_priority()
> and kvm_dist_put_priority(). This doesn't affect migration
> compatibility. We should do this separately from fixing the
> other bitmaps, because it's simpler.

Agreed for the other bitmaps. I actually stumbled on
kvm_dist_get_priority and got stuck there ;-) So indeed the subsection
is needed.
> 
>> I don't understand you TCG remark above, sorry.
> 
> You can migrate a TCG VM as well as a KVM one. The
> TCG GICv3 doesn't use any of this code in hw/intc/arm_gicv3_kvm.c,
> so it doesn't have any of these bugs. So any fixups for the
> KVM bugs so we get migration correct in the
> "buggy VM" -> "not buggy VM" situation should not misidentify
> a TCG VM as "buggy".

Ah OK. Thank you for the clarification

Eric
> 
> PS: I have a feeling that kvm_dist_get/set_priority have
> an endianness problem -- the 'reg' we read from the kernel
> will be a 32-bit value with the priority byte for the
> lowest-numbered IRQ in its least significant byte, but if
> the host is big-endian we'll write that to the wrong offset
> in the gicd_priority[] array...
> 
> thanks
> -- PMM
>
Shannon Zhao May 25, 2018, 8:42 a.m. UTC | #12
On 2018/5/24 22:56, Peter Maydell wrote:
> On 24 May 2018 at 15:40, Auger Eric <eric.auger@redhat.com> wrote:
>> > Hi Peter,
>> >
>> > On 05/24/2018 04:16 PM, Peter Maydell wrote:
>>> >> Only for KVM, not for TCG, and it's the other way round: we
>>> >> end up with two lots of PPI/SGI space in the data structure
>>> >> by mistake. Let me fish out the comment I made on the v2 of this
>>> >> series:
>>> >>
>>> >> In the code in master, we have QEMU data structures
>>> >> (bitmaps, etc) which have one entry for each of GICV3_MAXIRQ
>>> >> irqs. That includes the RAZ/WI unused space for the SPIs/PPIs, so
>>> >> for a 1-bit-per-irq bitmap:
>>> >>  [0x00000000, irq 32, irq 33, .... ]
>>> >>
>>> >> When we fill in the values from KVM into these data structures,
>>> >> we start after the unused space, because the for_each_dist_irq_reg()
>>> >> macro starts with _irq = GIC_INTERNAL. But we forgot to adjust
>>> >> the offset value we use for the KVM access, so we start by
>>> >> reading the RAZ/WI values from KVM, and the data structure
>>> >> contents end up with:
>>> >>  [0x00000000, 0x00000000, irq 32, irq 33, ... ]
>>> >> (and the last irqs wouldn't get transferred).
>> > In kvm_dist_get_priority (new code), the offset is where we read and
>> > field is where we write, correct? Offset was shifted so we effectively
>> > read in KVM regs the num_irq-32 SPI states now but don't we start
>> > writing at the beginning of bmp, (ie s->gicd_ipriority), at PPI/SGI
>> > offset? What am I missing?
> Oops, yes, you're right. My explanation applies to the
> various other bitmaps, where we are accessing the
> fields in the data structure using gic_bmp_ptr32(bmp, irq),
> but not to gicd_ipriority[], which we are directly accessing
> starting with the first word, not by indexing via bmp[irq].
> 
> So we need to handle these two cases differently.
> You're correct that for gicd_ipriority[], the code in
> master reads and writes to that data structure as:
>  [0, 0, ..., 0, irq 32, irq 33, ..., 0, 0, ... 0]
> so all the values are in the right place but we:
>  (a) unnecessarily read/write zeroes for the PPI/SGI fields
>  (b) fail to transfer the last 32 interrupts
> 
> We can fix the gicd_ipriority[] case simply by adding
>    bmp = GIC_INTERNAL;
> before the assignment to 'field' in both kvm_dist_get_priority()
> and kvm_dist_put_priority(). This doesn't affect migration
> compatibility. We should do this separately from fixing the
> other bitmaps, because it's simpler.
> 
If we do bmp += GIC_INTERNAL, we should also add this to offset,
otherwise we will put the SGI/PPIs data to SPIs, right?

Thanks,
Peter Maydell May 25, 2018, 9 a.m. UTC | #13
On 25 May 2018 at 09:42, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> On 2018/5/24 22:56, Peter Maydell wrote:
>> We can fix the gicd_ipriority[] case simply by adding
>>    bmp = GIC_INTERNAL;
>> before the assignment to 'field' in both kvm_dist_get_priority()
>> and kvm_dist_put_priority(). This doesn't affect migration
>> compatibility. We should do this separately from fixing the
>> other bitmaps, because it's simpler.
>>
> If we do bmp += GIC_INTERNAL, we should also add this to offset,
> otherwise we will put the SGI/PPIs data to SPIs, right?

Yes. This code seems remarkably hard to get right, I think
because we effectively have three different things indexing
through the loop -- irq, field, offset -- and they're all
independently set to starting values in different places.

thanks
-- PMM
Shannon Zhao May 25, 2018, 9:15 a.m. UTC | #14
On 2018/5/24 21:11, Peter Maydell wrote:
> On 23 May 2018 at 04:53, Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>> While we skip the GIC_INTERNAL irqs, we don't change the register offset
>> accordingly. This will overlap the GICR registers value and leave the
>> last GIC_INTERNAL irq's registers out of update.
>>
>> Fix this by skipping the registers banked by GICR.
>>
>> Also for migration compatibility if the migration source (old version
>> qemu) doesn't send gicd_no_shift_bug = 1 to destination, then we shift
>> the data of PPI to get the right data for SPI.
>>
>> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
>> Cc: qemu-stable@nongnu.org
>> Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
>> ---
>> Changes in V3: add migration compatibility and fix code style
>> ---
>>  hw/intc/arm_gicv3_common.c         | 36 ++++++++++++++++++++++++
>>  hw/intc/arm_gicv3_kvm.c            | 56 +++++++++++++++++++++++++++++++++++++-
>>  include/hw/intc/arm_gicv3_common.h |  1 +
>>  3 files changed, 92 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>> index 7b54d52..f93e5d2 100644
>> --- a/hw/intc/arm_gicv3_common.c
>> +++ b/hw/intc/arm_gicv3_common.c
>> @@ -141,6 +141,38 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>      }
>>  };
>>
>> +static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
>> +{
>> +    GICv3State *cs = opaque;
>> +
>> +   /*
>> +    * If the gicd_no_shift_bug subsection is not transferred this
>> +    * means gicd_no_shift_bug is 0x0 (which might not be the same as
>> +    * our reset value).
>> +    */
> 
> This comment seems to have been copied from a similar one about
> SRE_EL1, and I think it's a bit misleading here. For icc_sre_el1,
> that is a guest-visible struct value which we set to something in the
> device's reset function. This gicd_no_shift_bug field is
> only for the benefit of migration.
> 
> This comment is the ideal place to explain the semantics of gicd_no_shift_bug
> and why we have to use it.
> 
> You should also only set this if KVM is enabled, because the TCG
> GIC gets the semantics of the data structure right.
> 
ok

>> +    cs->gicd_no_shift_bug = 0x0;
>> +    return 0;
>> +}
>> +
>> +static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
>> +{
>> +    GICv3State *cs = opaque;
>> +
>> +    return cs->gicd_no_shift_bug;
>> +}
>> +
>> +const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
>> +    .name = "arm_gicv3/gicd_no_shift_bug",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
>> +    .needed = gicv3_gicd_no_shift_bug_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
> 
> You also need a post-load function, because that is where you want
> to fix up the incoming data (by memcpy'ing it into the right place).
> 
Ok.

>> +
>>  static const VMStateDescription vmstate_gicv3 = {
>>      .name = "arm_gicv3",
>>      .version_id = 1,
>> @@ -165,6 +197,10 @@ static const VMStateDescription vmstate_gicv3 = {
>>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>>                                               vmstate_gicv3_cpu, GICv3CPUState),
>>          VMSTATE_END_OF_LIST()
>> +    },
>> +    .subsections = (const VMStateDescription * []) {
>> +        &vmstate_gicv3_gicd_no_shift_bug,
>> +        NULL
>>      }
>>  };
>>
>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>> index 3536795..bd961f1 100644
>> --- a/hw/intc/arm_gicv3_kvm.c
>> +++ b/hw/intc/arm_gicv3_kvm.c
>> @@ -136,6 +136,12 @@ static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      int irq;
>>
>>      field = (uint32_t *)bmp;
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
>> +     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
>> +     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
>> +     * sync them.
>> +     */
> 
> This is true, but not why we need to add to the offset. We need
> to add to the offset because for_each_dist_irq_reg()'s loop
> handles irq numbers starting from GIC_INTERNAL, but the offset
> we have is for the start of the GICD_IPRIORITYR<n> register range,
> which includes space for the irqs 0..GIC_INTERNAL-1.
> 
>> +    offset += (8 * sizeof(uint32_t));
> 
> Possibly these offset changes would be clearer written as
> 
>      offset += (GIC_INTERNAL * bits-per-irq) / 8;
> 
> where bits-per-irq is the same as the last argument to for_each_dist_irq_reg()?
> 
>>      for_each_dist_irq_reg(irq, s->num_irq, 8) {
>>          kvm_gicd_access(s, offset, &reg, false);
>>          *field = reg;
>> @@ -149,7 +155,18 @@ static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
>>      uint32_t reg, *field;
>>      int irq;
>>
>> -    field = (uint32_t *)bmp;
>> +    if (!s->gicd_no_shift_bug) {
>> +        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
>> +    } else {
>> +        field = (uint32_t *)bmp;
>> +    }
> 
> As noted above, don't try to fix the migration compat problem up here.
> The code for syncing registers should just assume the data structure
> is being used correctly.
> 
>> @@ -235,6 +274,20 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
>>      uint32_t reg;
>>      int irq;
>>
>> +    if (!s->gicd_no_shift_bug) {
>> +        bmp += (1 * sizeof(uint32_t));
>> +    }
> 
> As noted above, don't try to fix the migration compat problem up here.
> The code for syncing registers should just assume the data structure
> is being used correctly.
> 
>> +
>> +    /* For the KVM GICv3, affinity routing is always enabled, and the
>> +     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
>> +     * are always RAZ/WI. The corresponding functionality is replaced by the
>> +     * GICR registers. So it doesn't need to sync them.
>> +     */
>> +    offset += (1 * sizeof(uint32_t));
>> +    if (clroffset != 0) {
>> +        clroffset += (1 * sizeof(uint32_t));
>> +    }
>> +
>>      for_each_dist_irq_reg(irq, s->num_irq, 1) {
>>          /* If this bitmap is a set/clear register pair, first write to the
>>           * clear-reg to clear all bits before using the set-reg to write
>> @@ -651,6 +704,7 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
>>          return;
>>      }
>>
>> +    s->gicd_no_shift_bug = 1;
> 
> This is the wrong place to do this reset, because then it won't
> be in effect for the TCG GIC.
> 
>>      kvm_arm_gicv3_put(s);
>>  }
>>
>> diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
>> index bccdfe1..13c28c0 100644
>> --- a/include/hw/intc/arm_gicv3_common.h
>> +++ b/include/hw/intc/arm_gicv3_common.h
>> @@ -217,6 +217,7 @@ struct GICv3State {
>>      uint32_t revision;
>>      bool security_extn;
>>      bool irq_reset_nonsecure;
>> +    bool gicd_no_shift_bug;
> 
> Probably putting 'migration' in the field name will help to indicate
> that it's only a migration compat fixup.
> 
How about gicd_no_migration_shift_bug?

>>
>>      int dev_fd; /* kvm device fd if backed by kvm vgic support */
>>      Error *migration_blocker;
>> --
>> 2.0.4
> 
> thanks
> -- PMM
> 
> .
> 

Thanks,
diff mbox series

Patch

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 7b54d52..f93e5d2 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -141,6 +141,38 @@  static const VMStateDescription vmstate_gicv3_cpu = {
     }
 };
 
+static int gicv3_gicd_no_shift_bug_pre_load(void *opaque)
+{
+    GICv3State *cs = opaque;
+
+   /*
+    * If the gicd_no_shift_bug subsection is not transferred this
+    * means gicd_no_shift_bug is 0x0 (which might not be the same as
+    * our reset value).
+    */
+    cs->gicd_no_shift_bug = 0x0;
+    return 0;
+}
+
+static bool gicv3_gicd_no_shift_bug_needed(void *opaque)
+{
+    GICv3State *cs = opaque;
+
+    return cs->gicd_no_shift_bug;
+}
+
+const VMStateDescription vmstate_gicv3_gicd_no_shift_bug = {
+    .name = "arm_gicv3/gicd_no_shift_bug",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = gicv3_gicd_no_shift_bug_pre_load,
+    .needed = gicv3_gicd_no_shift_bug_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(gicd_no_shift_bug, GICv3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_gicv3 = {
     .name = "arm_gicv3",
     .version_id = 1,
@@ -165,6 +197,10 @@  static const VMStateDescription vmstate_gicv3 = {
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
                                              vmstate_gicv3_cpu, GICv3CPUState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gicv3_gicd_no_shift_bug,
+        NULL
     }
 };
 
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 3536795..bd961f1 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -136,6 +136,12 @@  static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     int irq;
 
     field = (uint32_t *)bmp;
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
+     * sync them.
+     */
+    offset += (8 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         kvm_gicd_access(s, offset, &reg, false);
         *field = reg;
@@ -149,7 +155,18 @@  static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
     uint32_t reg, *field;
     int irq;
 
-    field = (uint32_t *)bmp;
+    if (!s->gicd_no_shift_bug) {
+        field = (uint32_t *)(bmp + 8 * sizeof(uint32_t));
+    } else {
+        field = (uint32_t *)bmp;
+    }
+
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 8
+     * GICD_IPRIORITYR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_IPRIORITYR<n>. So it doesn't need to
+     * sync them.
+     */
+    offset += (8 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 8) {
         reg = *field;
         kvm_gicd_access(s, offset, &reg, true);
@@ -164,6 +181,12 @@  static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
+     * them.
+     */
+    offset += (2 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 2) {
         kvm_gicd_access(s, offset, &reg, false);
         reg = half_unshuffle32(reg >> 1);
@@ -181,6 +204,16 @@  static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    if (!s->gicd_no_shift_bug) {
+        bmp += (2 * sizeof(uint32_t));
+    }
+
+    /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+     * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+     * functionality is replaced by GICR_ICFGR<n>. So it doesn't need to sync
+     * them.
+     */
+    offset += (2 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 2) {
         reg = *gic_bmp_ptr32(bmp, irq);
         if (irq % 32 != 0) {
@@ -222,6 +255,12 @@  static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
     uint32_t reg;
     int irq;
 
+    /* For the KVM GICv3, affinity routing is always enabled, and the
+     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
+     * are always RAZ/WI. The corresponding functionality is replaced by the
+     * GICR registers. So it doesn't need to sync them.
+     */
+    offset += (1 * sizeof(uint32_t));
     for_each_dist_irq_reg(irq, s->num_irq, 1) {
         kvm_gicd_access(s, offset, &reg, false);
         *gic_bmp_ptr32(bmp, irq) = reg;
@@ -235,6 +274,20 @@  static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
     uint32_t reg;
     int irq;
 
+    if (!s->gicd_no_shift_bug) {
+        bmp += (1 * sizeof(uint32_t));
+    }
+
+    /* For the KVM GICv3, affinity routing is always enabled, and the
+     * GICD_IGROUPR0/GICD_ISENABLER0/GICD_ISPENDR0/GICD_ISACTIVER0 registers
+     * are always RAZ/WI. The corresponding functionality is replaced by the
+     * GICR registers. So it doesn't need to sync them.
+     */
+    offset += (1 * sizeof(uint32_t));
+    if (clroffset != 0) {
+        clroffset += (1 * sizeof(uint32_t));
+    }
+
     for_each_dist_irq_reg(irq, s->num_irq, 1) {
         /* If this bitmap is a set/clear register pair, first write to the
          * clear-reg to clear all bits before using the set-reg to write
@@ -651,6 +704,7 @@  static void kvm_arm_gicv3_reset(DeviceState *dev)
         return;
     }
 
+    s->gicd_no_shift_bug = 1;
     kvm_arm_gicv3_put(s);
 }
 
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index bccdfe1..13c28c0 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -217,6 +217,7 @@  struct GICv3State {
     uint32_t revision;
     bool security_extn;
     bool irq_reset_nonsecure;
+    bool gicd_no_shift_bug;
 
     int dev_fd; /* kvm device fd if backed by kvm vgic support */
     Error *migration_blocker;