diff mbox

[v6,07/15] spapr_rtas: add ibm, configure-connector RTAS interface

Message ID 1425006675-19976-8-git-send-email-mdroth@linux.vnet.ibm.com
State New
Headers show

Commit Message

Michael Roth Feb. 27, 2015, 3:11 a.m. UTC
This interface is used to fetch an OF device-tree nodes that describes a
newly-attached device to guest. It is called multiple times to walk the
device-tree node and fetch individual properties into a 'workarea'/buffer
provided by the guest.

The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
the initial hotplug operation, and the state of these RTAS calls is tracked by
the sPAPRDRConnector. When the last of these properties is successfully
fetched, we report as special return value to the guest and transition
the device to a 'configured' state on the QEMU/DRC side.

See docs/specs/ppc-spapr-hotplug.txt for a complete description of
this interface.

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 hw/ppc/spapr_rtas.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

Comments

David Gibson March 2, 2015, 7:02 a.m. UTC | #1
On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> This interface is used to fetch an OF device-tree nodes that describes a
> newly-attached device to guest. It is called multiple times to walk the
> device-tree node and fetch individual properties into a 'workarea'/buffer
> provided by the guest.
> 
> The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> the initial hotplug operation, and the state of these RTAS calls is tracked by
> the sPAPRDRConnector. When the last of these properties is successfully
> fetched, we report as special return value to the guest and transition
> the device to a 'configured' state on the QEMU/DRC side.
> 
> See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> this interface.
> 
> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>


So, actually, here's probably the best place to explain what I had in
mind for changing the internal interface for this stuff.  I was
thinking something like this pseudocode:

struct DRCCCState {
	void *fdt;
	int offset;
	int depth;
};

rtas_configure_connector()
{
	...
	DRCCCState *ccstate;
	...

	/* check parameters, retrieve drc */
	ccstate = drc->ccstate;

	if (!ccstate) {
		/* Haven't started configuring yet */
		ccstate = malloc(...);
		/* Retrieve the dt fragment from the backend */
		ccstate->fdt = drck->get_dt(...);
		ccstate->offset = 0;
	}

	while (get next tag from fdt) {
		switch (tag)
		case FDT_PROPERTY:
	      		/* Translate property into rtas return values */
			return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;

		/* other cases ... */
	}
	
	/* Fall through only if we've completed streaming out the dt
	*/

	 /* Tell the back end we've finished configuring */
	drck->cc_completed(...);
	return SPAPR_DR_CC_RESPONSE_SUCCESS;
}

On reset, or anything else which interrupts the configuration process,
just blow away drc->ccstate.


> ---
>  hw/ppc/spapr_rtas.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 88 insertions(+)
> 
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index f80beb2..31ad35f 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -418,6 +418,92 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>      rtas_st(rets, 1, entity_sense);
>  }
>  
> +/* configure-connector work area offsets, int32_t units for field
> + * indexes, bytes for field offset/len values.
> + *
> + * as documented by PAPR+ v2.7, 13.5.3.5
> + */
> +#define CC_IDX_NODE_NAME_OFFSET 2
> +#define CC_IDX_PROP_NAME_OFFSET 2
> +#define CC_IDX_PROP_LEN 3
> +#define CC_IDX_PROP_DATA_OFFSET 4
> +#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
> +#define CC_WA_LEN 4096
> +
> +static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
> +                                         sPAPREnvironment *spapr,
> +                                         uint32_t token, uint32_t nargs,
> +                                         target_ulong args, uint32_t nret,
> +                                         target_ulong rets)
> +{
> +    uint64_t wa_addr;
> +    uint64_t wa_offset;
> +    uint32_t drc_index;
> +    sPAPRDRConnector *drc;
> +    sPAPRDRConnectorClass *drck;
> +    sPAPRDRCCResponse resp;
> +    const struct fdt_property *prop = NULL;
> +    char *prop_name = NULL;
> +    int prop_len, rc;
> +
> +    if (nargs != 2 || nret != 1) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
> +
> +    drc_index = rtas_ld(wa_addr, 0);
> +    drc = spapr_dr_connector_by_index(drc_index);
> +    if (!drc) {
> +        DPRINTF("rtas_ibm_configure_connector: invalid sensor/DRC index: %xh\n",
> +                drc_index);
> +        rc = RTAS_OUT_PARAM_ERROR;
> +        goto out;
> +    }
> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    resp = drck->configure_connector(drc, &prop_name, &prop, &prop_len);
> +
> +    switch (resp) {
> +    case SPAPR_DR_CC_RESPONSE_NEXT_CHILD:
> +        /* provide the name of the next OF node */
> +        wa_offset = CC_VAL_DATA_OFFSET;
> +        rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
> +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> +                              (uint8_t *)prop_name, strlen(prop_name) + 1);
> +        break;
> +    case SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY:
> +        /* provide the name of the next OF property */
> +        wa_offset = CC_VAL_DATA_OFFSET;
> +        rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
> +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> +                              (uint8_t *)prop_name, strlen(prop_name) + 1);
> +
> +        /* provide the length and value of the OF property. data gets placed
> +         * immediately after NULL terminator of the OF property's name string
> +         */
> +        wa_offset += strlen(prop_name) + 1,
> +        rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
> +        rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
> +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> +                              (uint8_t *)((struct fdt_property *)prop)->data,
> +                              prop_len);
> +        break;
> +    case SPAPR_DR_CC_RESPONSE_PREV_PARENT:
> +    case SPAPR_DR_CC_RESPONSE_ERROR:
> +    case SPAPR_DR_CC_RESPONSE_SUCCESS:
> +        break;
> +    default:
> +        /* drck->configure_connector() should not return anything else */
> +        g_assert(false);
> +    }
> +
> +    rc = resp;
> +out:
> +    g_free(prop_name);
> +    rtas_st(rets, 0, rc);
> +}
> +
>  static struct rtas_call {
>      const char *name;
>      spapr_rtas_fn fn;
> @@ -551,6 +637,8 @@ static void core_rtas_register_types(void)
>                          rtas_set_indicator);
>      spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
>                          rtas_get_sensor_state);
> +    spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
> +                        rtas_ibm_configure_connector);
>  }
>  
>  type_init(core_rtas_register_types)
Michael Roth March 3, 2015, 4:40 a.m. UTC | #2
Quoting David Gibson (2015-03-02 01:02:46)
> On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > This interface is used to fetch an OF device-tree nodes that describes a
> > newly-attached device to guest. It is called multiple times to walk the
> > device-tree node and fetch individual properties into a 'workarea'/buffer
> > provided by the guest.
> > 
> > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > the sPAPRDRConnector. When the last of these properties is successfully
> > fetched, we report as special return value to the guest and transition
> > the device to a 'configured' state on the QEMU/DRC side.
> > 
> > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > this interface.
> > 
> > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> 
> 
> So, actually, here's probably the best place to explain what I had in
> mind for changing the internal interface for this stuff.  I was
> thinking something like this pseudocode:
> 
> struct DRCCCState {
>         void *fdt;
>         int offset;
>         int depth;
> };
> 
> rtas_configure_connector()
> {
>         ...
>         DRCCCState *ccstate;
>         ...
> 
>         /* check parameters, retrieve drc */
>         ccstate = drc->ccstate;
> 
>         if (!ccstate) {
>                 /* Haven't started configuring yet */
>                 ccstate = malloc(...);
>                 /* Retrieve the dt fragment from the backend */
>                 ccstate->fdt = drck->get_dt(...);
>                 ccstate->offset = 0;
>         }
> 
>         while (get next tag from fdt) {
>                 switch (tag)
>                 case FDT_PROPERTY:
>                         /* Translate property into rtas return values */
>                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> 
>                 /* other cases ... */
>         }
>         
>         /* Fall through only if we've completed streaming out the dt
>         */
> 
>          /* Tell the back end we've finished configuring */
>         drck->cc_completed(...);
>         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> }
> 
> On reset, or anything else which interrupts the configuration process,
> just blow away drc->ccstate.

Ok, that seems reasonable. I took a stab at it here:

    https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
    https://github.com/mdroth/qemu/commits/spapr-hotplug-pci

It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
place of drck->cc_completed() I have drck->set_configured() which serves
roughly the same purpose I think. I opted not to let RTAS handle
allocation, since it seemed to imply RTAS owns it and not the DRC.

I plan to squash these into v7, but wanted to run the patch by you first.
Let me know if you'd rather I just post v7.

Thanks!

> 
> 
> > ---
> >  hw/ppc/spapr_rtas.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 88 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > index f80beb2..31ad35f 100644
> > --- a/hw/ppc/spapr_rtas.c
> > +++ b/hw/ppc/spapr_rtas.c
> > @@ -418,6 +418,92 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> >      rtas_st(rets, 1, entity_sense);
> >  }
> >  
> > +/* configure-connector work area offsets, int32_t units for field
> > + * indexes, bytes for field offset/len values.
> > + *
> > + * as documented by PAPR+ v2.7, 13.5.3.5
> > + */
> > +#define CC_IDX_NODE_NAME_OFFSET 2
> > +#define CC_IDX_PROP_NAME_OFFSET 2
> > +#define CC_IDX_PROP_LEN 3
> > +#define CC_IDX_PROP_DATA_OFFSET 4
> > +#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
> > +#define CC_WA_LEN 4096
> > +
> > +static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
> > +                                         sPAPREnvironment *spapr,
> > +                                         uint32_t token, uint32_t nargs,
> > +                                         target_ulong args, uint32_t nret,
> > +                                         target_ulong rets)
> > +{
> > +    uint64_t wa_addr;
> > +    uint64_t wa_offset;
> > +    uint32_t drc_index;
> > +    sPAPRDRConnector *drc;
> > +    sPAPRDRConnectorClass *drck;
> > +    sPAPRDRCCResponse resp;
> > +    const struct fdt_property *prop = NULL;
> > +    char *prop_name = NULL;
> > +    int prop_len, rc;
> > +
> > +    if (nargs != 2 || nret != 1) {
> > +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > +        return;
> > +    }
> > +
> > +    wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
> > +
> > +    drc_index = rtas_ld(wa_addr, 0);
> > +    drc = spapr_dr_connector_by_index(drc_index);
> > +    if (!drc) {
> > +        DPRINTF("rtas_ibm_configure_connector: invalid sensor/DRC index: %xh\n",
> > +                drc_index);
> > +        rc = RTAS_OUT_PARAM_ERROR;
> > +        goto out;
> > +    }
> > +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > +    resp = drck->configure_connector(drc, &prop_name, &prop, &prop_len);
> > +
> > +    switch (resp) {
> > +    case SPAPR_DR_CC_RESPONSE_NEXT_CHILD:
> > +        /* provide the name of the next OF node */
> > +        wa_offset = CC_VAL_DATA_OFFSET;
> > +        rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
> > +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> > +                              (uint8_t *)prop_name, strlen(prop_name) + 1);
> > +        break;
> > +    case SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY:
> > +        /* provide the name of the next OF property */
> > +        wa_offset = CC_VAL_DATA_OFFSET;
> > +        rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
> > +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> > +                              (uint8_t *)prop_name, strlen(prop_name) + 1);
> > +
> > +        /* provide the length and value of the OF property. data gets placed
> > +         * immediately after NULL terminator of the OF property's name string
> > +         */
> > +        wa_offset += strlen(prop_name) + 1,
> > +        rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
> > +        rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
> > +        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
> > +                              (uint8_t *)((struct fdt_property *)prop)->data,
> > +                              prop_len);
> > +        break;
> > +    case SPAPR_DR_CC_RESPONSE_PREV_PARENT:
> > +    case SPAPR_DR_CC_RESPONSE_ERROR:
> > +    case SPAPR_DR_CC_RESPONSE_SUCCESS:
> > +        break;
> > +    default:
> > +        /* drck->configure_connector() should not return anything else */
> > +        g_assert(false);
> > +    }
> > +
> > +    rc = resp;
> > +out:
> > +    g_free(prop_name);
> > +    rtas_st(rets, 0, rc);
> > +}
> > +
> >  static struct rtas_call {
> >      const char *name;
> >      spapr_rtas_fn fn;
> > @@ -551,6 +637,8 @@ static void core_rtas_register_types(void)
> >                          rtas_set_indicator);
> >      spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
> >                          rtas_get_sensor_state);
> > +    spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
> > +                        rtas_ibm_configure_connector);
> >  }
> >  
> >  type_init(core_rtas_register_types)
> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson March 3, 2015, 5:33 a.m. UTC | #3
On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> Quoting David Gibson (2015-03-02 01:02:46)
> > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > This interface is used to fetch an OF device-tree nodes that describes a
> > > newly-attached device to guest. It is called multiple times to walk the
> > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > provided by the guest.
> > > 
> > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > the sPAPRDRConnector. When the last of these properties is successfully
> > > fetched, we report as special return value to the guest and transition
> > > the device to a 'configured' state on the QEMU/DRC side.
> > > 
> > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > this interface.
> > > 
> > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > 
> > 
> > So, actually, here's probably the best place to explain what I had in
> > mind for changing the internal interface for this stuff.  I was
> > thinking something like this pseudocode:
> > 
> > struct DRCCCState {
> >         void *fdt;
> >         int offset;
> >         int depth;
> > };
> > 
> > rtas_configure_connector()
> > {
> >         ...
> >         DRCCCState *ccstate;
> >         ...
> > 
> >         /* check parameters, retrieve drc */
> >         ccstate = drc->ccstate;
> > 
> >         if (!ccstate) {
> >                 /* Haven't started configuring yet */
> >                 ccstate = malloc(...);
> >                 /* Retrieve the dt fragment from the backend */
> >                 ccstate->fdt = drck->get_dt(...);
> >                 ccstate->offset = 0;
> >         }
> > 
> >         while (get next tag from fdt) {
> >                 switch (tag)
> >                 case FDT_PROPERTY:
> >                         /* Translate property into rtas return values */
> >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > 
> >                 /* other cases ... */
> >         }
> >         
> >         /* Fall through only if we've completed streaming out the dt
> >         */
> > 
> >          /* Tell the back end we've finished configuring */
> >         drck->cc_completed(...);
> >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > }
> > 
> > On reset, or anything else which interrupts the configuration process,
> > just blow away drc->ccstate.
> 
> Ok, that seems reasonable. I took a stab at it here:
> 
>     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
>     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci

It's looking pretty close now, thanks for the rework.

> It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> place of drck->cc_completed() I have drck->set_configured() which serves
> roughly the same purpose I think. I opted not to let RTAS handle
> allocation, since it seemed to imply RTAS owns it and not the DRC.

So, that was intentional; basically RTAS *does* own the CCstate.  But
for convenience of index we need connect it to the DRC.  Think of it
like an rtas_priv field in the DRC.

In particular I think the CCstate should be opaque to everything
except the RTAS code itself, which means initializing the offset and
depth in RTAS, not in a drck callback.  As far as the drck callback
is concerned, it's supplying a dt fragment, but it doesn't care about
the details of how the upper layer communicates that through to the
guest.
Michael Roth March 4, 2015, 5:50 a.m. UTC | #4
Quoting David Gibson (2015-03-02 23:33:39)
> On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > Quoting David Gibson (2015-03-02 01:02:46)
> > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > newly-attached device to guest. It is called multiple times to walk the
> > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > provided by the guest.
> > > > 
> > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > fetched, we report as special return value to the guest and transition
> > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > 
> > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > this interface.
> > > > 
> > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > 
> > > 
> > > So, actually, here's probably the best place to explain what I had in
> > > mind for changing the internal interface for this stuff.  I was
> > > thinking something like this pseudocode:
> > > 
> > > struct DRCCCState {
> > >         void *fdt;
> > >         int offset;
> > >         int depth;
> > > };
> > > 
> > > rtas_configure_connector()
> > > {
> > >         ...
> > >         DRCCCState *ccstate;
> > >         ...
> > > 
> > >         /* check parameters, retrieve drc */
> > >         ccstate = drc->ccstate;
> > > 
> > >         if (!ccstate) {
> > >                 /* Haven't started configuring yet */
> > >                 ccstate = malloc(...);
> > >                 /* Retrieve the dt fragment from the backend */
> > >                 ccstate->fdt = drck->get_dt(...);
> > >                 ccstate->offset = 0;
> > >         }
> > > 
> > >         while (get next tag from fdt) {
> > >                 switch (tag)
> > >                 case FDT_PROPERTY:
> > >                         /* Translate property into rtas return values */
> > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > 
> > >                 /* other cases ... */
> > >         }
> > >         
> > >         /* Fall through only if we've completed streaming out the dt
> > >         */
> > > 
> > >          /* Tell the back end we've finished configuring */
> > >         drck->cc_completed(...);
> > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > }
> > > 
> > > On reset, or anything else which interrupts the configuration process,
> > > just blow away drc->ccstate.
> > 
> > Ok, that seems reasonable. I took a stab at it here:
> > 
> >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> 
> It's looking pretty close now, thanks for the rework.
> 
> > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > place of drck->cc_completed() I have drck->set_configured() which serves
> > roughly the same purpose I think. I opted not to let RTAS handle
> > allocation, since it seemed to imply RTAS owns it and not the DRC.
> 
> So, that was intentional; basically RTAS *does* own the CCstate.  But
> for convenience of index we need connect it to the DRC.  Think of it
> like an rtas_priv field in the DRC.
> 
> In particular I think the CCstate should be opaque to everything
> except the RTAS code itself, which means initializing the offset and
> depth in RTAS, not in a drck callback.  As far as the drck callback
> is concerned, it's supplying a dt fragment, but it doesn't care about
> the details of how the upper layer communicates that through to the
> guest.

Ah ok, so it was about moving the CCState out of DRC, and not just the
awkward interface that wraps FDT traversal. So I went ahead and did it
as you suggested, but also making it actually opaque, and relying on
a couple callbacks that configure-connector passes to
drc->begin_configure_connector to handle init/reset of the CCState
fields (such as the fdt, and the start offset (which isn't necessarilly 0)):

  https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
  https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531

I think I have all your other comments addressed, so if that looks ok
I'll post v7 soon. Thanks!

> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
Michael Roth March 4, 2015, 1:37 p.m. UTC | #5
Quoting Michael Roth (2015-03-03 23:50:34)
> Quoting David Gibson (2015-03-02 23:33:39)
> > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > provided by the guest.
> > > > > 
> > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > fetched, we report as special return value to the guest and transition
> > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > 
> > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > this interface.
> > > > > 
> > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > 
> > > > 
> > > > So, actually, here's probably the best place to explain what I had in
> > > > mind for changing the internal interface for this stuff.  I was
> > > > thinking something like this pseudocode:
> > > > 
> > > > struct DRCCCState {
> > > >         void *fdt;
> > > >         int offset;
> > > >         int depth;
> > > > };
> > > > 
> > > > rtas_configure_connector()
> > > > {
> > > >         ...
> > > >         DRCCCState *ccstate;
> > > >         ...
> > > > 
> > > >         /* check parameters, retrieve drc */
> > > >         ccstate = drc->ccstate;
> > > > 
> > > >         if (!ccstate) {
> > > >                 /* Haven't started configuring yet */
> > > >                 ccstate = malloc(...);
> > > >                 /* Retrieve the dt fragment from the backend */
> > > >                 ccstate->fdt = drck->get_dt(...);
> > > >                 ccstate->offset = 0;
> > > >         }
> > > > 
> > > >         while (get next tag from fdt) {
> > > >                 switch (tag)
> > > >                 case FDT_PROPERTY:
> > > >                         /* Translate property into rtas return values */
> > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > 
> > > >                 /* other cases ... */
> > > >         }
> > > >         
> > > >         /* Fall through only if we've completed streaming out the dt
> > > >         */
> > > > 
> > > >          /* Tell the back end we've finished configuring */
> > > >         drck->cc_completed(...);
> > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > }
> > > > 
> > > > On reset, or anything else which interrupts the configuration process,
> > > > just blow away drc->ccstate.
> > > 
> > > Ok, that seems reasonable. I took a stab at it here:
> > > 
> > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > 
> > It's looking pretty close now, thanks for the rework.
> > 
> > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > roughly the same purpose I think. I opted not to let RTAS handle
> > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > 
> > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > for convenience of index we need connect it to the DRC.  Think of it
> > like an rtas_priv field in the DRC.
> > 
> > In particular I think the CCstate should be opaque to everything
> > except the RTAS code itself, which means initializing the offset and
> > depth in RTAS, not in a drck callback.  As far as the drck callback
> > is concerned, it's supplying a dt fragment, but it doesn't care about
> > the details of how the upper layer communicates that through to the
> > guest.
> 
> Ah ok, so it was about moving the CCState out of DRC, and not just the
> awkward interface that wraps FDT traversal. So I went ahead and did it
> as you suggested, but also making it actually opaque, and relying on
> a couple callbacks that configure-connector passes to
> drc->begin_configure_connector to handle init/reset of the CCState
> fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> 
>   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
>   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> 
> I think I have all your other comments addressed, so if that looks ok
> I'll post v7 soon. Thanks!

Yikes, just noticed a use-after-free in the new code. Fixed here:

  https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708

> 
> > 
> > -- 
> > David Gibson                    | I'll have my music baroque, and my code
> > david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
> >                                 | _way_ _around_!
> > http://www.ozlabs.org/~dgibson
David Gibson March 5, 2015, 4:30 a.m. UTC | #6
On Wed, Mar 04, 2015 at 07:37:08AM -0600, Michael Roth wrote:
> Quoting Michael Roth (2015-03-03 23:50:34)
> > Quoting David Gibson (2015-03-02 23:33:39)
> > > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > > provided by the guest.
> > > > > > 
> > > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > > fetched, we report as special return value to the guest and transition
> > > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > > 
> > > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > > this interface.
> > > > > > 
> > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > 
> > > > > 
> > > > > So, actually, here's probably the best place to explain what I had in
> > > > > mind for changing the internal interface for this stuff.  I was
> > > > > thinking something like this pseudocode:
> > > > > 
> > > > > struct DRCCCState {
> > > > >         void *fdt;
> > > > >         int offset;
> > > > >         int depth;
> > > > > };
> > > > > 
> > > > > rtas_configure_connector()
> > > > > {
> > > > >         ...
> > > > >         DRCCCState *ccstate;
> > > > >         ...
> > > > > 
> > > > >         /* check parameters, retrieve drc */
> > > > >         ccstate = drc->ccstate;
> > > > > 
> > > > >         if (!ccstate) {
> > > > >                 /* Haven't started configuring yet */
> > > > >                 ccstate = malloc(...);
> > > > >                 /* Retrieve the dt fragment from the backend */
> > > > >                 ccstate->fdt = drck->get_dt(...);
> > > > >                 ccstate->offset = 0;
> > > > >         }
> > > > > 
> > > > >         while (get next tag from fdt) {
> > > > >                 switch (tag)
> > > > >                 case FDT_PROPERTY:
> > > > >                         /* Translate property into rtas return values */
> > > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > > 
> > > > >                 /* other cases ... */
> > > > >         }
> > > > >         
> > > > >         /* Fall through only if we've completed streaming out the dt
> > > > >         */
> > > > > 
> > > > >          /* Tell the back end we've finished configuring */
> > > > >         drck->cc_completed(...);
> > > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > > }
> > > > > 
> > > > > On reset, or anything else which interrupts the configuration process,
> > > > > just blow away drc->ccstate.
> > > > 
> > > > Ok, that seems reasonable. I took a stab at it here:
> > > > 
> > > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > 
> > > It's looking pretty close now, thanks for the rework.
> > > 
> > > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > > roughly the same purpose I think. I opted not to let RTAS handle
> > > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > > 
> > > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > > for convenience of index we need connect it to the DRC.  Think of it
> > > like an rtas_priv field in the DRC.
> > > 
> > > In particular I think the CCstate should be opaque to everything
> > > except the RTAS code itself, which means initializing the offset and
> > > depth in RTAS, not in a drck callback.  As far as the drck callback
> > > is concerned, it's supplying a dt fragment, but it doesn't care about
> > > the details of how the upper layer communicates that through to the
> > > guest.
> > 
> > Ah ok, so it was about moving the CCState out of DRC, and not just the
> > awkward interface that wraps FDT traversal. So I went ahead and did it
> > as you suggested, but also making it actually opaque, and relying on
> > a couple callbacks that configure-connector passes to
> > drc->begin_configure_connector to handle init/reset of the CCState
> > fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> > 
> >   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> >   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > 
> > I think I have all your other comments addressed, so if that looks ok
> > I'll post v7 soon. Thanks!
> 
> Yikes, just noticed a use-after-free in the new code. Fixed here:
> 
>   https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708

Ok, I'm now getting myself a bit tangled in the various revisions.
However looking at

https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531

The ->begin_configure_connector stuff seems unnecessarily
complicated.  Couldn't you just have begin_configure_connector()
return the fdt, then initialize ccs in rtas_ibm_configure_connector()
itself, avoiding the callback-from-a-callback.

I'm also not sure that reset_ccs is worth abstracting.  I think it
would be reasonable just to say that freeing and setting to NULL the
ccs link is sufficient.

That said, the current reset_ccs doesn't appear to be quite right,
since it frees the ccs structure, but not the fdt fragment it points
to.  I'm not sure how awkward it would be to force them into a common
allocation to avoid that.
Michael Roth March 5, 2015, 2:12 p.m. UTC | #7
Quoting David Gibson (2015-03-04 22:30:40)
> On Wed, Mar 04, 2015 at 07:37:08AM -0600, Michael Roth wrote:
> > Quoting Michael Roth (2015-03-03 23:50:34)
> > > Quoting David Gibson (2015-03-02 23:33:39)
> > > > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > > > provided by the guest.
> > > > > > > 
> > > > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > > > fetched, we report as special return value to the guest and transition
> > > > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > > > 
> > > > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > > > this interface.
> > > > > > > 
> > > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > > 
> > > > > > 
> > > > > > So, actually, here's probably the best place to explain what I had in
> > > > > > mind for changing the internal interface for this stuff.  I was
> > > > > > thinking something like this pseudocode:
> > > > > > 
> > > > > > struct DRCCCState {
> > > > > >         void *fdt;
> > > > > >         int offset;
> > > > > >         int depth;
> > > > > > };
> > > > > > 
> > > > > > rtas_configure_connector()
> > > > > > {
> > > > > >         ...
> > > > > >         DRCCCState *ccstate;
> > > > > >         ...
> > > > > > 
> > > > > >         /* check parameters, retrieve drc */
> > > > > >         ccstate = drc->ccstate;
> > > > > > 
> > > > > >         if (!ccstate) {
> > > > > >                 /* Haven't started configuring yet */
> > > > > >                 ccstate = malloc(...);
> > > > > >                 /* Retrieve the dt fragment from the backend */
> > > > > >                 ccstate->fdt = drck->get_dt(...);
> > > > > >                 ccstate->offset = 0;
> > > > > >         }
> > > > > > 
> > > > > >         while (get next tag from fdt) {
> > > > > >                 switch (tag)
> > > > > >                 case FDT_PROPERTY:
> > > > > >                         /* Translate property into rtas return values */
> > > > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > > > 
> > > > > >                 /* other cases ... */
> > > > > >         }
> > > > > >         
> > > > > >         /* Fall through only if we've completed streaming out the dt
> > > > > >         */
> > > > > > 
> > > > > >          /* Tell the back end we've finished configuring */
> > > > > >         drck->cc_completed(...);
> > > > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > > > }
> > > > > > 
> > > > > > On reset, or anything else which interrupts the configuration process,
> > > > > > just blow away drc->ccstate.
> > > > > 
> > > > > Ok, that seems reasonable. I took a stab at it here:
> > > > > 
> > > > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > > > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > 
> > > > It's looking pretty close now, thanks for the rework.
> > > > 
> > > > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > > > roughly the same purpose I think. I opted not to let RTAS handle
> > > > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > > > 
> > > > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > > > for convenience of index we need connect it to the DRC.  Think of it
> > > > like an rtas_priv field in the DRC.
> > > > 
> > > > In particular I think the CCstate should be opaque to everything
> > > > except the RTAS code itself, which means initializing the offset and
> > > > depth in RTAS, not in a drck callback.  As far as the drck callback
> > > > is concerned, it's supplying a dt fragment, but it doesn't care about
> > > > the details of how the upper layer communicates that through to the
> > > > guest.
> > > 
> > > Ah ok, so it was about moving the CCState out of DRC, and not just the
> > > awkward interface that wraps FDT traversal. So I went ahead and did it
> > > as you suggested, but also making it actually opaque, and relying on
> > > a couple callbacks that configure-connector passes to
> > > drc->begin_configure_connector to handle init/reset of the CCState
> > > fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> > > 
> > >   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > >   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > 
> > > I think I have all your other comments addressed, so if that looks ok
> > > I'll post v7 soon. Thanks!
> > 
> > Yikes, just noticed a use-after-free in the new code. Fixed here:
> > 
> >   https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708
> 
> Ok, I'm now getting myself a bit tangled in the various revisions.
> However looking at
> 
> https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> 
> The ->begin_configure_connector stuff seems unnecessarily
> complicated.  Couldn't you just have begin_configure_connector()
> return the fdt, then initialize ccs in rtas_ibm_configure_connector()
> itself, avoiding the callback-from-a-callback.

We need the fdt, as well as the fdt starting offset, to initialize the CCS.
I think it's a matter a of taste whether that's those are returned separately,
or through a callback passed via begin_configure_connector. The approach I
took just seemed a bit more instructive about what data was needed,
and why. drck->get_fdt() and drck->get_fdt_starting_offset() instead of the
callback seemed a bit much too specific in purpose to warrant a general
interface, and it since we seem to need a reset_ccs anyway (see below),
init_ccs seemed like a good place to contain those values.

I am fine with just initializing ccs via get_fdt()/get_fdt_starting_offset()
beforehand though, but I do think we're stuck with a reset_ccs callback
if we're agreed on drck->get_configure_connector_state() == NULL being
the primary means to invalidate CCS state.

> 
> I'm also not sure that reset_ccs is worth abstracting.  I think it
> would be reasonable just to say that freeing and setting to NULL the
> ccs link is sufficient.

But after allocation, rtas_configure_connector hands over the ccs link
to DRC, and it's local copy goes out of scope. The only way to retrieve
it is via get_configure_connector_state(), so if the idea is to return
NULL open reset, we have no way to free the ccs structure. If we simply
have DRC free it, we violate the idea that ccs state is opaque. So given
the init_ccs callback above, it made sense to handle the free via a
reset_ccs.

> 
> That said, the current reset_ccs doesn't appear to be quite right,
> since it frees the ccs structure, but not the fdt fragment it points
> to.  I'm not sure how awkward it would be to force them into a common
> allocation to avoid that.

You mean freeing the actual FDT data? In this case the FDT pointer is
simply a pointer to the copy the DRC has, and the lifecycle of the FDT
is tied to the device lifecycle, and spans beyond that of a CCS (since
we can configure/unconfigure the same device multiple times without
unplugging in between)

> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson March 12, 2015, 5:52 a.m. UTC | #8
On Thu, Mar 05, 2015 at 08:12:58AM -0600, Michael Roth wrote:
> Quoting David Gibson (2015-03-04 22:30:40)
> > On Wed, Mar 04, 2015 at 07:37:08AM -0600, Michael Roth wrote:
> > > Quoting Michael Roth (2015-03-03 23:50:34)
> > > > Quoting David Gibson (2015-03-02 23:33:39)
> > > > > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > > > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > > > > provided by the guest.
> > > > > > > > 
> > > > > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > > > > fetched, we report as special return value to the guest and transition
> > > > > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > > > > 
> > > > > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > > > > this interface.
> > > > > > > > 
> > > > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > > > 
> > > > > > > 
> > > > > > > So, actually, here's probably the best place to explain what I had in
> > > > > > > mind for changing the internal interface for this stuff.  I was
> > > > > > > thinking something like this pseudocode:
> > > > > > > 
> > > > > > > struct DRCCCState {
> > > > > > >         void *fdt;
> > > > > > >         int offset;
> > > > > > >         int depth;
> > > > > > > };
> > > > > > > 
> > > > > > > rtas_configure_connector()
> > > > > > > {
> > > > > > >         ...
> > > > > > >         DRCCCState *ccstate;
> > > > > > >         ...
> > > > > > > 
> > > > > > >         /* check parameters, retrieve drc */
> > > > > > >         ccstate = drc->ccstate;
> > > > > > > 
> > > > > > >         if (!ccstate) {
> > > > > > >                 /* Haven't started configuring yet */
> > > > > > >                 ccstate = malloc(...);
> > > > > > >                 /* Retrieve the dt fragment from the backend */
> > > > > > >                 ccstate->fdt = drck->get_dt(...);
> > > > > > >                 ccstate->offset = 0;
> > > > > > >         }
> > > > > > > 
> > > > > > >         while (get next tag from fdt) {
> > > > > > >                 switch (tag)
> > > > > > >                 case FDT_PROPERTY:
> > > > > > >                         /* Translate property into rtas return values */
> > > > > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > > > > 
> > > > > > >                 /* other cases ... */
> > > > > > >         }
> > > > > > >         
> > > > > > >         /* Fall through only if we've completed streaming out the dt
> > > > > > >         */
> > > > > > > 
> > > > > > >          /* Tell the back end we've finished configuring */
> > > > > > >         drck->cc_completed(...);
> > > > > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > > > > }
> > > > > > > 
> > > > > > > On reset, or anything else which interrupts the configuration process,
> > > > > > > just blow away drc->ccstate.
> > > > > > 
> > > > > > Ok, that seems reasonable. I took a stab at it here:
> > > > > > 
> > > > > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > > > > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > > 
> > > > > It's looking pretty close now, thanks for the rework.
> > > > > 
> > > > > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > > > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > > > > roughly the same purpose I think. I opted not to let RTAS handle
> > > > > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > > > > 
> > > > > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > > > > for convenience of index we need connect it to the DRC.  Think of it
> > > > > like an rtas_priv field in the DRC.
> > > > > 
> > > > > In particular I think the CCstate should be opaque to everything
> > > > > except the RTAS code itself, which means initializing the offset and
> > > > > depth in RTAS, not in a drck callback.  As far as the drck callback
> > > > > is concerned, it's supplying a dt fragment, but it doesn't care about
> > > > > the details of how the upper layer communicates that through to the
> > > > > guest.
> > > > 
> > > > Ah ok, so it was about moving the CCState out of DRC, and not just the
> > > > awkward interface that wraps FDT traversal. So I went ahead and did it
> > > > as you suggested, but also making it actually opaque, and relying on
> > > > a couple callbacks that configure-connector passes to
> > > > drc->begin_configure_connector to handle init/reset of the CCState
> > > > fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> > > > 
> > > >   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > >   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > > 
> > > > I think I have all your other comments addressed, so if that looks ok
> > > > I'll post v7 soon. Thanks!
> > > 
> > > Yikes, just noticed a use-after-free in the new code. Fixed here:
> > > 
> > >   https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708
> > 
> > Ok, I'm now getting myself a bit tangled in the various revisions.
> > However looking at
> > 
> > https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > 
> > The ->begin_configure_connector stuff seems unnecessarily
> > complicated.  Couldn't you just have begin_configure_connector()
> > return the fdt, then initialize ccs in rtas_ibm_configure_connector()
> > itself, avoiding the callback-from-a-callback.
> 
> We need the fdt, as well as the fdt starting offset, to initialize the CCS.

Do you actually have a use-case for a non-zero starting offset? Or
could you simplify by having the individual PCI device always create
its fdt fragment at offset 0.

> I think it's a matter a of taste whether that's those are returned separately,
> or through a callback passed via begin_configure_connector. The approach I
> took just seemed a bit more instructive about what data was needed,
> and why.

> drck->get_fdt() and drck->get_fdt_starting_offset() instead of the
> callback seemed a bit much too specific in purpose to warrant a general
> interface, and it since we seem to need a reset_ccs anyway (see below),
> init_ccs seemed like a good place to contain those values.

Um.. I'm a bit confused by this.  You could return both the fdt
pointert and offset as one call using pointers or a structure return
value without needing to invoke a callback-from-a-callback.

> I am fine with just initializing ccs via get_fdt()/get_fdt_starting_offset()
> beforehand though, but I do think we're stuck with a reset_ccs callback
> if we're agreed on drck->get_configure_connector_state() == NULL being
> the primary means to invalidate CCS state.

Hm.  I'll have to take another look.  I'd really like to keep things
to a single set of callbacks if possible, rather than having both
callbacks and counter-callbacks, or whatever you want to call them.

> > I'm also not sure that reset_ccs is worth abstracting.  I think it
> > would be reasonable just to say that freeing and setting to NULL the
> > ccs link is sufficient.
> 
> But after allocation, rtas_configure_connector hands over the ccs link
> to DRC, and it's local copy goes out of scope. The only way to retrieve
> it is via get_configure_connector_state(), so if the idea is to return
> NULL open reset, we have no way to free the ccs structure. If we simply
> have DRC free it, we violate the idea that ccs state is opaque. So given
> the init_ccs callback above, it made sense to handle the free via a
> reset_ccs.
> 
> > 
> > That said, the current reset_ccs doesn't appear to be quite right,
> > since it frees the ccs structure, but not the fdt fragment it points
> > to.  I'm not sure how awkward it would be to force them into a common
> > allocation to avoid that.
> 
> You mean freeing the actual FDT data? In this case the FDT pointer is
> simply a pointer to the copy the DRC has, and the lifecycle of the FDT
> is tied to the device lifecycle, and spans beyond that of a CCS (since
> we can configure/unconfigure the same device multiple times without
> unplugging in between)

Oh, ok.  Why do you need a copy in ccstate then?  The rtas code has
access to the drc structure as well.
Michael Roth March 17, 2015, 3:31 a.m. UTC | #9
Quoting David Gibson (2015-03-12 00:52:10)
> On Thu, Mar 05, 2015 at 08:12:58AM -0600, Michael Roth wrote:
> > Quoting David Gibson (2015-03-04 22:30:40)
> > > On Wed, Mar 04, 2015 at 07:37:08AM -0600, Michael Roth wrote:
> > > > Quoting Michael Roth (2015-03-03 23:50:34)
> > > > > Quoting David Gibson (2015-03-02 23:33:39)
> > > > > > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > > > > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > > > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > > > > > provided by the guest.
> > > > > > > > > 
> > > > > > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > > > > > fetched, we report as special return value to the guest and transition
> > > > > > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > > > > > 
> > > > > > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > > > > > this interface.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > > > > 
> > > > > > > > 
> > > > > > > > So, actually, here's probably the best place to explain what I had in
> > > > > > > > mind for changing the internal interface for this stuff.  I was
> > > > > > > > thinking something like this pseudocode:
> > > > > > > > 
> > > > > > > > struct DRCCCState {
> > > > > > > >         void *fdt;
> > > > > > > >         int offset;
> > > > > > > >         int depth;
> > > > > > > > };
> > > > > > > > 
> > > > > > > > rtas_configure_connector()
> > > > > > > > {
> > > > > > > >         ...
> > > > > > > >         DRCCCState *ccstate;
> > > > > > > >         ...
> > > > > > > > 
> > > > > > > >         /* check parameters, retrieve drc */
> > > > > > > >         ccstate = drc->ccstate;
> > > > > > > > 
> > > > > > > >         if (!ccstate) {
> > > > > > > >                 /* Haven't started configuring yet */
> > > > > > > >                 ccstate = malloc(...);
> > > > > > > >                 /* Retrieve the dt fragment from the backend */
> > > > > > > >                 ccstate->fdt = drck->get_dt(...);
> > > > > > > >                 ccstate->offset = 0;
> > > > > > > >         }
> > > > > > > > 
> > > > > > > >         while (get next tag from fdt) {
> > > > > > > >                 switch (tag)
> > > > > > > >                 case FDT_PROPERTY:
> > > > > > > >                         /* Translate property into rtas return values */
> > > > > > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > > > > > 
> > > > > > > >                 /* other cases ... */
> > > > > > > >         }
> > > > > > > >         
> > > > > > > >         /* Fall through only if we've completed streaming out the dt
> > > > > > > >         */
> > > > > > > > 
> > > > > > > >          /* Tell the back end we've finished configuring */
> > > > > > > >         drck->cc_completed(...);
> > > > > > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > > > > > }
> > > > > > > > 
> > > > > > > > On reset, or anything else which interrupts the configuration process,
> > > > > > > > just blow away drc->ccstate.
> > > > > > > 
> > > > > > > Ok, that seems reasonable. I took a stab at it here:
> > > > > > > 
> > > > > > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > > > > > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > > > 
> > > > > > It's looking pretty close now, thanks for the rework.
> > > > > > 
> > > > > > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > > > > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > > > > > roughly the same purpose I think. I opted not to let RTAS handle
> > > > > > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > > > > > 
> > > > > > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > > > > > for convenience of index we need connect it to the DRC.  Think of it
> > > > > > like an rtas_priv field in the DRC.
> > > > > > 
> > > > > > In particular I think the CCstate should be opaque to everything
> > > > > > except the RTAS code itself, which means initializing the offset and
> > > > > > depth in RTAS, not in a drck callback.  As far as the drck callback
> > > > > > is concerned, it's supplying a dt fragment, but it doesn't care about
> > > > > > the details of how the upper layer communicates that through to the
> > > > > > guest.
> > > > > 
> > > > > Ah ok, so it was about moving the CCState out of DRC, and not just the
> > > > > awkward interface that wraps FDT traversal. So I went ahead and did it
> > > > > as you suggested, but also making it actually opaque, and relying on
> > > > > a couple callbacks that configure-connector passes to
> > > > > drc->begin_configure_connector to handle init/reset of the CCState
> > > > > fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> > > > > 
> > > > >   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > >   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > > > 
> > > > > I think I have all your other comments addressed, so if that looks ok
> > > > > I'll post v7 soon. Thanks!
> > > > 
> > > > Yikes, just noticed a use-after-free in the new code. Fixed here:
> > > > 
> > > >   https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708
> > > 
> > > Ok, I'm now getting myself a bit tangled in the various revisions.
> > > However looking at
> > > 
> > > https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > 
> > > The ->begin_configure_connector stuff seems unnecessarily
> > > complicated.  Couldn't you just have begin_configure_connector()
> > > return the fdt, then initialize ccs in rtas_ibm_configure_connector()
> > > itself, avoiding the callback-from-a-callback.
> > 
> > We need the fdt, as well as the fdt starting offset, to initialize the CCS.
> 
> Do you actually have a use-case for a non-zero starting offset? Or
> could you simplify by having the individual PCI device always create
> its fdt fragment at offset 0.

Something as simple as:

offset = fdt_add_subnode(fdt, 0, "pci@2");

Results in offset = 8

I'm not sure exactly why, but I guess a subnode has an inherent offset associated
with it.

I've since found that fdt_offset_ptr() can be used to bake the offset into the fdt
pointer, so RTAS can treat the offset as 0 from that point forward.

I've implemented a drc->get_fdt() using this approach.

> 
> > I think it's a matter a of taste whether that's those are returned separately,
> > or through a callback passed via begin_configure_connector. The approach I
> > took just seemed a bit more instructive about what data was needed,
> > and why.
> 
> > drck->get_fdt() and drck->get_fdt_starting_offset() instead of the
> > callback seemed a bit much too specific in purpose to warrant a general
> > interface, and it since we seem to need a reset_ccs anyway (see below),
> > init_ccs seemed like a good place to contain those values.
> 
> Um.. I'm a bit confused by this.  You could return both the fdt
> pointert and offset as one call using pointers or a structure return
> value without needing to invoke a callback-from-a-callback.

True, a get_fdt() could also take a pointer arg to store the offset, so
that's doable. fdt_offset_ptr() is a bit cleaner though IMO.

> 
> > I am fine with just initializing ccs via get_fdt()/get_fdt_starting_offset()
> > beforehand though, but I do think we're stuck with a reset_ccs callback
> > if we're agreed on drck->get_configure_connector_state() == NULL being
> > the primary means to invalidate CCS state.
> 
> Hm.  I'll have to take another look.  I'd really like to keep things
> to a single set of callbacks if possible, rather than having both
> callbacks and counter-callbacks, or whatever you want to call them.

I ran it by Alex during his IBM visit, and it's seeming like this is
turning out a bit more funky than necessary because we're trying to
combine my approach of relying on the DRC to store the state as an
opaque, while still keeping the state opaque to anything but RTAS.

If we move the configure-connector state to a separate list as you
originally suggested the need for wierd callbacks goes away.

This does bring about my original concerns about having a way for the
DRC to reset the state on 1) configure-connector reset, and 2) system reset

1) can be addressed by making the observation that RTAS does know
when to reset the configure-connector state, via rtas-set-indicator(ISOLATE).
So if we do it that way, RTAS can zap/invalidate a CCS as the same points
DRC would have do it. We leak a little bit of the DRC state-machine,
but it's fairly trivial.

2) can be addressed by registering a separate reset handler that clears the
   CCS list (which I've hung off of sPAPREnvironment, with
   spapr_ccs_{add,remove,reset_hook} to work with the list using drc_index
   as the key)

I've pushed the changes here:

https://github.com/mdroth/qemu/commits/spapr-hotplug-pci

2a6f2b2 *spapr_drc: make prop_get_fdt() standalone
9140fe4 *spapr_drc: add get_fdt() and set_configured()
d242fed *spapr_rtas: ibm,configure-connector, use get_fdt()/set_configured()
984ee1b *spapr_drc: drop the old stuff

let me know if you'd prefer I just submit a v8.

> 
> > > I'm also not sure that reset_ccs is worth abstracting.  I think it
> > > would be reasonable just to say that freeing and setting to NULL the
> > > ccs link is sufficient.
> > 
> > But after allocation, rtas_configure_connector hands over the ccs link
> > to DRC, and it's local copy goes out of scope. The only way to retrieve
> > it is via get_configure_connector_state(), so if the idea is to return
> > NULL open reset, we have no way to free the ccs structure. If we simply
> > have DRC free it, we violate the idea that ccs state is opaque. So given
> > the init_ccs callback above, it made sense to handle the free via a
> > reset_ccs.
> > 
> > > 
> > > That said, the current reset_ccs doesn't appear to be quite right,
> > > since it frees the ccs structure, but not the fdt fragment it points
> > > to.  I'm not sure how awkward it would be to force them into a common
> > > allocation to avoid that.
> > 
> > You mean freeing the actual FDT data? In this case the FDT pointer is
> > simply a pointer to the copy the DRC has, and the lifecycle of the FDT
> > is tied to the device lifecycle, and spans beyond that of a CCS (since
> > we can configure/unconfigure the same device multiple times without
> > unplugging in between)
> 
> Oh, ok.  Why do you need a copy in ccstate then?  The rtas code has
> access to the drc structure as well.

Hmm, true, we don't actually need a copy. It makes sense a little more
sense when using the fdt_offset_ptr() approach to get rid of the offset,
and I think now it makes the separation between DRC and
rtas-configure-connector a bit more complete, but we could still just
call drc->get_fdt() each time. Let me know if that's preferable and I'll
work it in for the next submission.

> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson March 23, 2015, 12:48 a.m. UTC | #10
On Mon, Mar 16, 2015 at 10:31:29PM -0500, Michael Roth wrote:
> Quoting David Gibson (2015-03-12 00:52:10)
> > On Thu, Mar 05, 2015 at 08:12:58AM -0600, Michael Roth wrote:
> > > Quoting David Gibson (2015-03-04 22:30:40)
> > > > On Wed, Mar 04, 2015 at 07:37:08AM -0600, Michael Roth wrote:
> > > > > Quoting Michael Roth (2015-03-03 23:50:34)
> > > > > > Quoting David Gibson (2015-03-02 23:33:39)
> > > > > > > On Mon, Mar 02, 2015 at 10:40:16PM -0600, Michael Roth wrote:
> > > > > > > > Quoting David Gibson (2015-03-02 01:02:46)
> > > > > > > > > On Thu, Feb 26, 2015 at 09:11:07PM -0600, Michael Roth wrote:
> > > > > > > > > > This interface is used to fetch an OF device-tree nodes that describes a
> > > > > > > > > > newly-attached device to guest. It is called multiple times to walk the
> > > > > > > > > > device-tree node and fetch individual properties into a 'workarea'/buffer
> > > > > > > > > > provided by the guest.
> > > > > > > > > > 
> > > > > > > > > > The device-tree is generated by QEMU and passed to an sPAPRDRConnector during
> > > > > > > > > > the initial hotplug operation, and the state of these RTAS calls is tracked by
> > > > > > > > > > the sPAPRDRConnector. When the last of these properties is successfully
> > > > > > > > > > fetched, we report as special return value to the guest and transition
> > > > > > > > > > the device to a 'configured' state on the QEMU/DRC side.
> > > > > > > > > > 
> > > > > > > > > > See docs/specs/ppc-spapr-hotplug.txt for a complete description of
> > > > > > > > > > this interface.
> > > > > > > > > > 
> > > > > > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > So, actually, here's probably the best place to explain what I had in
> > > > > > > > > mind for changing the internal interface for this stuff.  I was
> > > > > > > > > thinking something like this pseudocode:
> > > > > > > > > 
> > > > > > > > > struct DRCCCState {
> > > > > > > > >         void *fdt;
> > > > > > > > >         int offset;
> > > > > > > > >         int depth;
> > > > > > > > > };
> > > > > > > > > 
> > > > > > > > > rtas_configure_connector()
> > > > > > > > > {
> > > > > > > > >         ...
> > > > > > > > >         DRCCCState *ccstate;
> > > > > > > > >         ...
> > > > > > > > > 
> > > > > > > > >         /* check parameters, retrieve drc */
> > > > > > > > >         ccstate = drc->ccstate;
> > > > > > > > > 
> > > > > > > > >         if (!ccstate) {
> > > > > > > > >                 /* Haven't started configuring yet */
> > > > > > > > >                 ccstate = malloc(...);
> > > > > > > > >                 /* Retrieve the dt fragment from the backend */
> > > > > > > > >                 ccstate->fdt = drck->get_dt(...);
> > > > > > > > >                 ccstate->offset = 0;
> > > > > > > > >         }
> > > > > > > > > 
> > > > > > > > >         while (get next tag from fdt) {
> > > > > > > > >                 switch (tag)
> > > > > > > > >                 case FDT_PROPERTY:
> > > > > > > > >                         /* Translate property into rtas return values */
> > > > > > > > >                         return SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
> > > > > > > > > 
> > > > > > > > >                 /* other cases ... */
> > > > > > > > >         }
> > > > > > > > >         
> > > > > > > > >         /* Fall through only if we've completed streaming out the dt
> > > > > > > > >         */
> > > > > > > > > 
> > > > > > > > >          /* Tell the back end we've finished configuring */
> > > > > > > > >         drck->cc_completed(...);
> > > > > > > > >         return SPAPR_DR_CC_RESPONSE_SUCCESS;
> > > > > > > > > }
> > > > > > > > > 
> > > > > > > > > On reset, or anything else which interrupts the configuration process,
> > > > > > > > > just blow away drc->ccstate.
> > > > > > > > 
> > > > > > > > Ok, that seems reasonable. I took a stab at it here:
> > > > > > > > 
> > > > > > > >     https://github.com/mdroth/qemu/commit/79ce372743da1b63a6fa33e3de1f1daba8ea1fdc
> > > > > > > >     https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > > > > 
> > > > > > > It's looking pretty close now, thanks for the rework.
> > > > > > > 
> > > > > > > > It exposes the ccstate as you suggested, via drck->get_cc_state(), and in
> > > > > > > > place of drck->cc_completed() I have drck->set_configured() which serves
> > > > > > > > roughly the same purpose I think. I opted not to let RTAS handle
> > > > > > > > allocation, since it seemed to imply RTAS owns it and not the DRC.
> > > > > > > 
> > > > > > > So, that was intentional; basically RTAS *does* own the CCstate.  But
> > > > > > > for convenience of index we need connect it to the DRC.  Think of it
> > > > > > > like an rtas_priv field in the DRC.
> > > > > > > 
> > > > > > > In particular I think the CCstate should be opaque to everything
> > > > > > > except the RTAS code itself, which means initializing the offset and
> > > > > > > depth in RTAS, not in a drck callback.  As far as the drck callback
> > > > > > > is concerned, it's supplying a dt fragment, but it doesn't care about
> > > > > > > the details of how the upper layer communicates that through to the
> > > > > > > guest.
> > > > > > 
> > > > > > Ah ok, so it was about moving the CCState out of DRC, and not just the
> > > > > > awkward interface that wraps FDT traversal. So I went ahead and did it
> > > > > > as you suggested, but also making it actually opaque, and relying on
> > > > > > a couple callbacks that configure-connector passes to
> > > > > > drc->begin_configure_connector to handle init/reset of the CCState
> > > > > > fields (such as the fdt, and the start offset (which isn't necessarilly 0)):
> > > > > > 
> > > > > >   https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> > > > > >   https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > > > > 
> > > > > > I think I have all your other comments addressed, so if that looks ok
> > > > > > I'll post v7 soon. Thanks!
> > > > > 
> > > > > Yikes, just noticed a use-after-free in the new code. Fixed here:
> > > > > 
> > > > >   https://github.com/mdroth/qemu/commit/3fd03f649dc5cd34aa6e2544d38855dd0f8b3708
> > > > 
> > > > Ok, I'm now getting myself a bit tangled in the various revisions.
> > > > However looking at
> > > > 
> > > > https://github.com/mdroth/qemu/commit/732aa10fa2e41951c396373e7df7d31861322531
> > > > 
> > > > The ->begin_configure_connector stuff seems unnecessarily
> > > > complicated.  Couldn't you just have begin_configure_connector()
> > > > return the fdt, then initialize ccs in rtas_ibm_configure_connector()
> > > > itself, avoiding the callback-from-a-callback.
> > > 
> > > We need the fdt, as well as the fdt starting offset, to initialize the CCS.
> > 
> > Do you actually have a use-case for a non-zero starting offset? Or
> > could you simplify by having the individual PCI device always create
> > its fdt fragment at offset 0.
> 
> Something as simple as:
> 
> offset = fdt_add_subnode(fdt, 0, "pci@2");
> 
> Results in offset = 8

Oh, right, of course.  I'd forgotten that your sample node wasn't the
"root" of your fragmentary tree, but instead a subnode of that unused
root.

> I'm not sure exactly why, but I guess a subnode has an inherent offset associated
> with it.

Well, in this case it will be the FDT_NODE_BEGIN for the fragment's
root node at offset 0, the root node's name ("") at offset 4, then we
align to a 4 byte boundary, leaving the next tag, FDT_NODE_BEGIN for
the node you care about at offset 8.

So.. it would be possible to bake the PCI tree fragments with the
actual PCI node as the root node (this sort of case is why we allow a
name on the root node, although it's usually "").  It might be a bit
awkward though - it can't be done with fdt_add_subnode(), only with
fdt_begin_node().

> I've since found that fdt_offset_ptr() can be used to bake the offset into the fdt
> pointer, so RTAS can treat the offset as 0 from that point forward.

Hrm.. I'm not sure quite what you have in mind here; it doesn't really
sound like an intended use case for fdt_offset_ptr() (which is mostly
intended as an internal interface only).

> I've implemented a drc->get_fdt() using this approach.



> 
> > 
> > > I think it's a matter a of taste whether that's those are returned separately,
> > > or through a callback passed via begin_configure_connector. The approach I
> > > took just seemed a bit more instructive about what data was needed,
> > > and why.
> > 
> > > drck->get_fdt() and drck->get_fdt_starting_offset() instead of the
> > > callback seemed a bit much too specific in purpose to warrant a general
> > > interface, and it since we seem to need a reset_ccs anyway (see below),
> > > init_ccs seemed like a good place to contain those values.
> > 
> > Um.. I'm a bit confused by this.  You could return both the fdt
> > pointert and offset as one call using pointers or a structure return
> > value without needing to invoke a callback-from-a-callback.
> 
> True, a get_fdt() could also take a pointer arg to store the offset, so
> that's doable. fdt_offset_ptr() is a bit cleaner though IMO.
> 
> > 
> > > I am fine with just initializing ccs via get_fdt()/get_fdt_starting_offset()
> > > beforehand though, but I do think we're stuck with a reset_ccs callback
> > > if we're agreed on drck->get_configure_connector_state() == NULL being
> > > the primary means to invalidate CCS state.
> > 
> > Hm.  I'll have to take another look.  I'd really like to keep things
> > to a single set of callbacks if possible, rather than having both
> > callbacks and counter-callbacks, or whatever you want to call them.
> 
> I ran it by Alex during his IBM visit, and it's seeming like this is
> turning out a bit more funky than necessary because we're trying to
> combine my approach of relying on the DRC to store the state as an
> opaque, while still keeping the state opaque to anything but RTAS.
> 
> If we move the configure-connector state to a separate list as you
> originally suggested the need for wierd callbacks goes away.
> 
> This does bring about my original concerns about having a way for the
> DRC to reset the state on 1) configure-connector reset, and 2) system reset
> 
> 1) can be addressed by making the observation that RTAS does know
> when to reset the configure-connector state, via rtas-set-indicator(ISOLATE).
> So if we do it that way, RTAS can zap/invalidate a CCS as the same points
> DRC would have do it. We leak a little bit of the DRC state-machine,
> but it's fairly trivial.
> 
> 2) can be addressed by registering a separate reset handler that clears the
>    CCS list (which I've hung off of sPAPREnvironment, with
>    spapr_ccs_{add,remove,reset_hook} to work with the list using drc_index
>    as the key)

Ok.  I'll have to look at the code again and see what it seems like.

> I've pushed the changes here:
> 
> https://github.com/mdroth/qemu/commits/spapr-hotplug-pci
> 
> 2a6f2b2 *spapr_drc: make prop_get_fdt() standalone
> 9140fe4 *spapr_drc: add get_fdt() and set_configured()
> d242fed *spapr_rtas: ibm,configure-connector, use get_fdt()/set_configured()
> 984ee1b *spapr_drc: drop the old stuff
> 
> let me know if you'd prefer I just submit a v8.

If you could push the v8 please.


> 
> > 
> > > > I'm also not sure that reset_ccs is worth abstracting.  I think it
> > > > would be reasonable just to say that freeing and setting to NULL the
> > > > ccs link is sufficient.
> > > 
> > > But after allocation, rtas_configure_connector hands over the ccs link
> > > to DRC, and it's local copy goes out of scope. The only way to retrieve
> > > it is via get_configure_connector_state(), so if the idea is to return
> > > NULL open reset, we have no way to free the ccs structure. If we simply
> > > have DRC free it, we violate the idea that ccs state is opaque. So given
> > > the init_ccs callback above, it made sense to handle the free via a
> > > reset_ccs.
> > > 
> > > > 
> > > > That said, the current reset_ccs doesn't appear to be quite right,
> > > > since it frees the ccs structure, but not the fdt fragment it points
> > > > to.  I'm not sure how awkward it would be to force them into a common
> > > > allocation to avoid that.
> > > 
> > > You mean freeing the actual FDT data? In this case the FDT pointer is
> > > simply a pointer to the copy the DRC has, and the lifecycle of the FDT
> > > is tied to the device lifecycle, and spans beyond that of a CCS (since
> > > we can configure/unconfigure the same device multiple times without
> > > unplugging in between)
> > 
> > Oh, ok.  Why do you need a copy in ccstate then?  The rtas code has
> > access to the drc structure as well.
> 
> Hmm, true, we don't actually need a copy. It makes sense a little more
> sense when using the fdt_offset_ptr() approach to get rid of the offset,
> and I think now it makes the separation between DRC and
> rtas-configure-connector a bit more complete, but we could still just
> call drc->get_fdt() each time. Let me know if that's preferable and I'll
> work it in for the next submission.

I think it's preferable.
diff mbox

Patch

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index f80beb2..31ad35f 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -418,6 +418,92 @@  static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     rtas_st(rets, 1, entity_sense);
 }
 
+/* configure-connector work area offsets, int32_t units for field
+ * indexes, bytes for field offset/len values.
+ *
+ * as documented by PAPR+ v2.7, 13.5.3.5
+ */
+#define CC_IDX_NODE_NAME_OFFSET 2
+#define CC_IDX_PROP_NAME_OFFSET 2
+#define CC_IDX_PROP_LEN 3
+#define CC_IDX_PROP_DATA_OFFSET 4
+#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
+#define CC_WA_LEN 4096
+
+static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
+                                         sPAPREnvironment *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args, uint32_t nret,
+                                         target_ulong rets)
+{
+    uint64_t wa_addr;
+    uint64_t wa_offset;
+    uint32_t drc_index;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    sPAPRDRCCResponse resp;
+    const struct fdt_property *prop = NULL;
+    char *prop_name = NULL;
+    int prop_len, rc;
+
+    if (nargs != 2 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
+
+    drc_index = rtas_ld(wa_addr, 0);
+    drc = spapr_dr_connector_by_index(drc_index);
+    if (!drc) {
+        DPRINTF("rtas_ibm_configure_connector: invalid sensor/DRC index: %xh\n",
+                drc_index);
+        rc = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    resp = drck->configure_connector(drc, &prop_name, &prop, &prop_len);
+
+    switch (resp) {
+    case SPAPR_DR_CC_RESPONSE_NEXT_CHILD:
+        /* provide the name of the next OF node */
+        wa_offset = CC_VAL_DATA_OFFSET;
+        rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
+        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                              (uint8_t *)prop_name, strlen(prop_name) + 1);
+        break;
+    case SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY:
+        /* provide the name of the next OF property */
+        wa_offset = CC_VAL_DATA_OFFSET;
+        rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
+        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                              (uint8_t *)prop_name, strlen(prop_name) + 1);
+
+        /* provide the length and value of the OF property. data gets placed
+         * immediately after NULL terminator of the OF property's name string
+         */
+        wa_offset += strlen(prop_name) + 1,
+        rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
+        rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
+        rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                              (uint8_t *)((struct fdt_property *)prop)->data,
+                              prop_len);
+        break;
+    case SPAPR_DR_CC_RESPONSE_PREV_PARENT:
+    case SPAPR_DR_CC_RESPONSE_ERROR:
+    case SPAPR_DR_CC_RESPONSE_SUCCESS:
+        break;
+    default:
+        /* drck->configure_connector() should not return anything else */
+        g_assert(false);
+    }
+
+    rc = resp;
+out:
+    g_free(prop_name);
+    rtas_st(rets, 0, rc);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -551,6 +637,8 @@  static void core_rtas_register_types(void)
                         rtas_set_indicator);
     spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
                         rtas_get_sensor_state);
+    spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
+                        rtas_ibm_configure_connector);
 }
 
 type_init(core_rtas_register_types)