Patchwork [08/17] pseries: savevm support for PAPR TCE tables

login
register
mail settings
Submitter Alexey Kardashevskiy
Date June 27, 2013, 6:45 a.m.
Message ID <1372315560-5478-9-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/255000/
State New
Headers show

Comments

Alexey Kardashevskiy - June 27, 2013, 6:45 a.m.
From: David Gibson <david@gibson.dropbear.id.au>

This patch adds the necessary VMStateDescription information to save the
state of PAPR TCE tables (that is, the PAPR specified IOMMU).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
Anthony Liguori - July 8, 2013, 6:39 p.m.
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> From: David Gibson <david@gibson.dropbear.id.au>
>
> This patch adds the necessary VMStateDescription information to save the
> state of PAPR TCE tables (that is, the PAPR specified IOMMU).
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
>
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 91bc8e4..ba1f7b6 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>      };
>  }
>  
> +static const VMStateDescription vmstate_spapr_tce_table = {
> +    .name = "spapr_iommu",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields      = (VMStateField []) {
> +        /* Sanity check */
> +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
> +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
> +
> +        /* IOMMU state */
> +        VMSTATE_BOOL(bypass, sPAPRTCETable),
> +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
> +                               SPAPR_TCE_PAGE_SIZE /
> sizeof(sPAPRTCE)),

Not endian safe.  I really don't get the divide bit at all either.

> +
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>      .translate = spapr_tce_translate_iommu,
>  };
> @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
>  
>      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>  
> +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
> +

If you need to add these, then you need to do more QOM conversion.

Regards,

Anthony Liguori

>      return tcet;
>  }
>  
> @@ -163,6 +184,10 @@ void spapr_tce_free(sPAPRTCETable *tcet)
>  {
>      QLIST_REMOVE(tcet, list);
>  
> +    vmstate_unregister(NULL, &vmstate_spapr_tce_table, tcet);
> +
> +    QLIST_REMOVE(tcet, list);
> +
>      if (!kvm_enabled() ||
>          (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
>                                   tcet->window_size) != 0)) {
> -- 
> 1.7.10.4
Benjamin Herrenschmidt - July 8, 2013, 9:45 p.m.
On Mon, 2013-07-08 at 13:39 -0500, Anthony Liguori wrote:
> > +    .fields      = (VMStateField []) {
> > +        /* Sanity check */
> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
> > +
> > +        /* IOMMU state */
> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
> > +                               SPAPR_TCE_PAGE_SIZE /
> > sizeof(sPAPRTCE)),
> 
> Not endian safe.  I really don't get the divide bit at all either.

What do you mean by not endian safe ? The TCE table is a well defined format,
it's always big endian regardless of the endianness of either host or guest.

Cheers,
Ben.
Anthony Liguori - July 8, 2013, 10:15 p.m.
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Mon, 2013-07-08 at 13:39 -0500, Anthony Liguori wrote:
>> > +    .fields      = (VMStateField []) {
>> > +        /* Sanity check */
>> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
>> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
>> > +
>> > +        /* IOMMU state */
>> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
>> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
>> > +                               SPAPR_TCE_PAGE_SIZE /
>> > sizeof(sPAPRTCE)),
>> 
>> Not endian safe.  I really don't get the divide bit at all either.
>
> What do you mean by not endian safe ? The TCE table is a well defined format,
> it's always big endian regardless of the endianness of either host or
> guest.

VMSTATE_VBUFFER is essentially:

  write(fd, s->table, byte_size_of_table);

It treats whatever is given it as a sized data blob.

table is an array of sPAPRTCE which is just a struct wrapper around a
uint64_t value (the tce entry).

Those entries are set via the h_put_tce hcall through a simple
assignment:

> static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>                                target_ulong tce)
> {
>     ...
> 
>     tcep = tcet->table + (ioba >> SPAPR_TCE_PAGE_SHIFT);
>     tcep->tce = tce;
>  ...
>
> static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>                               target_ulong opcode, target_ulong *args)
> {
>     ...
>     target_ulong tce = args[2];
>     sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> 
>     ...
> 
>     if (tcet) {
>         return put_tce_emu(tcet, ioba, tce);
>     }

Hypercall arguments are passed in CPU endianness so what's being stored
in the tce table is CPU endianness.

Since VBUFFER just does a blind write() of the full array of uint64s,
what goes on the wire will be CPU endianness.

So if you do a savevm on a little endian host and loadvm on a big endian
host, badness ensues.

The proper thing to do is use a VARRAY instead of a VBUFFER.  VARRAY
will handle endian because it treats the data as an array, not as an
opaque buffer.

Regards,

Anthony Liguori

>
> Cheers,
> Ben.
Benjamin Herrenschmidt - July 8, 2013, 10:41 p.m.
On Mon, 2013-07-08 at 17:15 -0500, Anthony Liguori wrote:

> Hypercall arguments are passed in CPU endianness so what's being stored
> in the tce table is CPU endianness.
> 
> Since VBUFFER just does a blind write() of the full array of uint64s,
> what goes on the wire will be CPU endianness.
> 
> So if you do a savevm on a little endian host and loadvm on a big endian
> host, badness ensues.
> 
> The proper thing to do is use a VARRAY instead of a VBUFFER.  VARRAY
> will handle endian because it treats the data as an array, not as an
> opaque buffer.

Ok, so that's indeed an issue for emulated TCEs because what qemu stores
is not the real (BE) TCE table but a "host native" version of it. I see.

Cheers,
Ben.
David Gibson - July 9, 2013, 7:20 a.m.
On Mon, Jul 08, 2013 at 01:39:26PM -0500, Anthony Liguori wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
> > From: David Gibson <david@gibson.dropbear.id.au>
> >
> > This patch adds the necessary VMStateDescription information to save the
> > state of PAPR TCE tables (that is, the PAPR specified IOMMU).
> >
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > ---
> >  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> >
> > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> > index 91bc8e4..ba1f7b6 100644
> > --- a/hw/ppc/spapr_iommu.c
> > +++ b/hw/ppc/spapr_iommu.c
> > @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
> >      };
> >  }
> >  
> > +static const VMStateDescription vmstate_spapr_tce_table = {
> > +    .name = "spapr_iommu",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .minimum_version_id_old = 1,
> > +    .fields      = (VMStateField []) {
> > +        /* Sanity check */
> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
> > +
> > +        /* IOMMU state */
> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
> > +                               SPAPR_TCE_PAGE_SIZE /
> > sizeof(sPAPRTCE)),
> 
> Not endian safe.  I really don't get the divide bit at all either.

So, the actual bug is that we're currently storing the TCE table
native endian, whereas it should be stored big endan always.
 
> > +
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
> > +
> >  static MemoryRegionIOMMUOps spapr_iommu_ops = {
> >      .translate = spapr_tce_translate_iommu,
> >  };
> > @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
> >  
> >      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
> >  
> > +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
> > +
> 
> If you need to add these, then you need to do more QOM conversion.

Again, it's not clear how this should be QOMed.  Child of the device
constructing the TCE table?  But since that can often be a bus bridge,
wouldn't the TCE table instances get confused with the real bus
devices.
Anthony Liguori - July 9, 2013, 3:22 p.m.
David Gibson <david@gibson.dropbear.id.au> writes:

> On Mon, Jul 08, 2013 at 01:39:26PM -0500, Anthony Liguori wrote:
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> 
>> > From: David Gibson <david@gibson.dropbear.id.au>
>> >
>> > This patch adds the necessary VMStateDescription information to save the
>> > state of PAPR TCE tables (that is, the PAPR specified IOMMU).
>> >
>> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> > ---
>> >  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
>> >  1 file changed, 25 insertions(+)
>> >
>> > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> > index 91bc8e4..ba1f7b6 100644
>> > --- a/hw/ppc/spapr_iommu.c
>> > +++ b/hw/ppc/spapr_iommu.c
>> > @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>> >      };
>> >  }
>> >  
>> > +static const VMStateDescription vmstate_spapr_tce_table = {
>> > +    .name = "spapr_iommu",
>> > +    .version_id = 1,
>> > +    .minimum_version_id = 1,
>> > +    .minimum_version_id_old = 1,
>> > +    .fields      = (VMStateField []) {
>> > +        /* Sanity check */
>> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
>> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
>> > +
>> > +        /* IOMMU state */
>> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
>> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
>> > +                               SPAPR_TCE_PAGE_SIZE /
>> > sizeof(sPAPRTCE)),
>> 
>> Not endian safe.  I really don't get the divide bit at all either.
>
> So, the actual bug is that we're currently storing the TCE table
> native endian, whereas it should be stored big endan always.

Why?  There are no guest visible byte accesses done to the table
AFAICT.  Everything is done as words and there's quite a lot of math
done to the entries.

It seems like native endian is the right internal representation.

>  
>> > +
>> > +        VMSTATE_END_OF_LIST()
>> > +    },
>> > +};
>> > +
>> >  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>> >      .translate = spapr_tce_translate_iommu,
>> >  };
>> > @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
>> >  
>> >      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>> >  
>> > +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
>> > +
>> 
>> If you need to add these, then you need to do more QOM conversion.
>
> Again, it's not clear how this should be QOMed.  Child of the device
> constructing the TCE table?  But since that can often be a bus bridge,
> wouldn't the TCE table instances get confused with the real bus
> devices.

I can't apply this series (I'm not sure what tree it's against), but if
Alexey pushes a branch somewhere I can do the QOM conversions to
demonstrate.

Regards,

Anthony Liguori

>
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
Anthony Liguori - July 9, 2013, 4:26 p.m.
David Gibson <david@gibson.dropbear.id.au> writes:

> On Mon, Jul 08, 2013 at 01:39:26PM -0500, Anthony Liguori wrote:
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> 
>> > From: David Gibson <david@gibson.dropbear.id.au>
>> >
>> > This patch adds the necessary VMStateDescription information to save the
>> > state of PAPR TCE tables (that is, the PAPR specified IOMMU).
>> >
>> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> > ---
>> >  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
>> >  1 file changed, 25 insertions(+)
>> >
>> > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> > index 91bc8e4..ba1f7b6 100644
>> > --- a/hw/ppc/spapr_iommu.c
>> > +++ b/hw/ppc/spapr_iommu.c
>> > @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>> >      };
>> >  }
>> >  
>> > +static const VMStateDescription vmstate_spapr_tce_table = {
>> > +    .name = "spapr_iommu",
>> > +    .version_id = 1,
>> > +    .minimum_version_id = 1,
>> > +    .minimum_version_id_old = 1,
>> > +    .fields      = (VMStateField []) {
>> > +        /* Sanity check */
>> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
>> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
>> > +
>> > +        /* IOMMU state */
>> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
>> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
>> > +                               SPAPR_TCE_PAGE_SIZE /
>> > sizeof(sPAPRTCE)),
>> 
>> Not endian safe.  I really don't get the divide bit at all either.
>
> So, the actual bug is that we're currently storing the TCE table
> native endian, whereas it should be stored big endan always.
>  
>> > +
>> > +        VMSTATE_END_OF_LIST()
>> > +    },
>> > +};
>> > +
>> >  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>> >      .translate = spapr_tce_translate_iommu,
>> >  };
>> > @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
>> >  
>> >      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>> >  
>> > +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
>> > +
>> 
>> If you need to add these, then you need to do more QOM conversion.
>
> Again, it's not clear how this should be QOMed.  Child of the device
> constructing the TCE table?  But since that can often be a bus bridge,
> wouldn't the TCE table instances get confused with the real bus
> devices.

Only build tested.

https://github.com/aliguori/qemu/commit/a47a391c875a69f203110811c730877da12f5b14

I'll put together a patch series once I have a chance to test properly.

Regards,

Anthony Liguori

>
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson - July 10, 2013, 7:42 a.m.
On Tue, Jul 09, 2013 at 10:22:39AM -0500, Anthony Liguori wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Mon, Jul 08, 2013 at 01:39:26PM -0500, Anthony Liguori wrote:
> >> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> >> 
> >> > From: David Gibson <david@gibson.dropbear.id.au>
> >> >
> >> > This patch adds the necessary VMStateDescription information to save the
> >> > state of PAPR TCE tables (that is, the PAPR specified IOMMU).
> >> >
> >> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> > ---
> >> >  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
> >> >  1 file changed, 25 insertions(+)
> >> >
> >> > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >> > index 91bc8e4..ba1f7b6 100644
> >> > --- a/hw/ppc/spapr_iommu.c
> >> > +++ b/hw/ppc/spapr_iommu.c
> >> > @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
> >> >      };
> >> >  }
> >> >  
> >> > +static const VMStateDescription vmstate_spapr_tce_table = {
> >> > +    .name = "spapr_iommu",
> >> > +    .version_id = 1,
> >> > +    .minimum_version_id = 1,
> >> > +    .minimum_version_id_old = 1,
> >> > +    .fields      = (VMStateField []) {
> >> > +        /* Sanity check */
> >> > +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
> >> > +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
> >> > +
> >> > +        /* IOMMU state */
> >> > +        VMSTATE_BOOL(bypass, sPAPRTCETable),
> >> > +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
> >> > +                               SPAPR_TCE_PAGE_SIZE /
> >> > sizeof(sPAPRTCE)),
> >> 
> >> Not endian safe.  I really don't get the divide bit at all either.
> >
> > So, the actual bug is that we're currently storing the TCE table
> > native endian, whereas it should be stored big endan always.
> 
> Why?  There are no guest visible byte accesses done to the table
> AFAICT.  Everything is done as words and there's quite a lot of math
> done to the entries.
> 
> It seems like native endian is the right internal representation.

Hrm.  I suppose it could be fixed at either end.  The idea was that
the table array would contain exactly the same bytes as would be
present in physical memory on a real bare-metal system, which seems
like a generally nice property.
Paolo Bonzini - July 15, 2013, 1:26 p.m.
Il 08/07/2013 20:39, Anthony Liguori ha scritto:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
>> From: David Gibson <david@gibson.dropbear.id.au>
>>
>> This patch adds the necessary VMStateDescription information to save the
>> state of PAPR TCE tables (that is, the PAPR specified IOMMU).
>>
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
>>  1 file changed, 25 insertions(+)
>>
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index 91bc8e4..ba1f7b6 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>      };
>>  }
>>  
>> +static const VMStateDescription vmstate_spapr_tce_table = {
>> +    .name = "spapr_iommu",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .minimum_version_id_old = 1,
>> +    .fields      = (VMStateField []) {
>> +        /* Sanity check */
>> +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
>> +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
>> +
>> +        /* IOMMU state */
>> +        VMSTATE_BOOL(bypass, sPAPRTCETable),
>> +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
>> +                               SPAPR_TCE_PAGE_SIZE /
>> sizeof(sPAPRTCE)),
> 
> Not endian safe.  I really don't get the divide bit at all either.
> 
>> +
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>>      .translate = spapr_tce_translate_iommu,
>>  };
>> @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
>>  
>>      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>>  
>> +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
>> +
> 
> If you need to add these, then you need to do more QOM conversion.

No, this does not need QOM conversion.  It needs a sub-vmstate, that is
then used by both the PCI and VIO bridges via VMSTATE_STRUCT.

Paolo

> Regards,
> 
> Anthony Liguori
> 
>>      return tcet;
>>  }
>>  
>> @@ -163,6 +184,10 @@ void spapr_tce_free(sPAPRTCETable *tcet)
>>  {
>>      QLIST_REMOVE(tcet, list);
>>  
>> +    vmstate_unregister(NULL, &vmstate_spapr_tce_table, tcet);
>> +
>> +    QLIST_REMOVE(tcet, list);
>> +
>>      if (!kvm_enabled() ||
>>          (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
>>                                   tcet->window_size) != 0)) {
>> -- 
>> 1.7.10.4
>
Anthony Liguori - July 15, 2013, 3:06 p.m.
Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 08/07/2013 20:39, Anthony Liguori ha scritto:
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> 
>>> From: David Gibson <david@gibson.dropbear.id.au>
>>>
>>> This patch adds the necessary VMStateDescription information to save the
>>> state of PAPR TCE tables (that is, the PAPR specified IOMMU).
>>>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>  hw/ppc/spapr_iommu.c |   25 +++++++++++++++++++++++++
>>>  1 file changed, 25 insertions(+)
>>>
>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>> index 91bc8e4..ba1f7b6 100644
>>> --- a/hw/ppc/spapr_iommu.c
>>> +++ b/hw/ppc/spapr_iommu.c
>>> @@ -112,6 +112,25 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
>>>      };
>>>  }
>>>  
>>> +static const VMStateDescription vmstate_spapr_tce_table = {
>>> +    .name = "spapr_iommu",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .minimum_version_id_old = 1,
>>> +    .fields      = (VMStateField []) {
>>> +        /* Sanity check */
>>> +        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
>>> +        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
>>> +
>>> +        /* IOMMU state */
>>> +        VMSTATE_BOOL(bypass, sPAPRTCETable),
>>> +        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
>>> +                               SPAPR_TCE_PAGE_SIZE /
>>> sizeof(sPAPRTCE)),
>> 
>> Not endian safe.  I really don't get the divide bit at all either.
>> 
>>> +
>>> +        VMSTATE_END_OF_LIST()
>>> +    },
>>> +};
>>> +
>>>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>>>      .translate = spapr_tce_translate_iommu,
>>>  };
>>> @@ -156,6 +175,8 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
>>>  
>>>      QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
>>>  
>>> +    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
>>> +
>> 
>> If you need to add these, then you need to do more QOM conversion.
>
> No, this does not need QOM conversion.  It needs a sub-vmstate, that is
> then used by both the PCI and VIO bridges via VMSTATE_STRUCT.

I already QOM converted it and made it a sub-object.

I think that's better from a modeling point of view than using a
sub-vmstate.

Patches coming shortly.

Regards,

Anthony Liguori

>
> Paolo
>
>> Regards,
>> 
>> Anthony Liguori
>> 
>>>      return tcet;
>>>  }
>>>  
>>> @@ -163,6 +184,10 @@ void spapr_tce_free(sPAPRTCETable *tcet)
>>>  {
>>>      QLIST_REMOVE(tcet, list);
>>>  
>>> +    vmstate_unregister(NULL, &vmstate_spapr_tce_table, tcet);
>>> +
>>> +    QLIST_REMOVE(tcet, list);
>>> +
>>>      if (!kvm_enabled() ||
>>>          (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
>>>                                   tcet->window_size) != 0)) {
>>> -- 
>>> 1.7.10.4
>>

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 91bc8e4..ba1f7b6 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -112,6 +112,25 @@  static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
     };
 }
 
+static const VMStateDescription vmstate_spapr_tce_table = {
+    .name = "spapr_iommu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        /* Sanity check */
+        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
+        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
+
+        /* IOMMU state */
+        VMSTATE_BOOL(bypass, sPAPRTCETable),
+        VMSTATE_VBUFFER_DIVIDE(table, sPAPRTCETable, 0, NULL, 0, window_size,
+                               SPAPR_TCE_PAGE_SIZE / sizeof(sPAPRTCE)),
+
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
     .translate = spapr_tce_translate_iommu,
 };
@@ -156,6 +175,8 @@  sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
+    vmstate_register(NULL, tcet->liobn, &vmstate_spapr_tce_table, tcet);
+
     return tcet;
 }
 
@@ -163,6 +184,10 @@  void spapr_tce_free(sPAPRTCETable *tcet)
 {
     QLIST_REMOVE(tcet, list);
 
+    vmstate_unregister(NULL, &vmstate_spapr_tce_table, tcet);
+
+    QLIST_REMOVE(tcet, list);
+
     if (!kvm_enabled() ||
         (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
                                  tcet->window_size) != 0)) {