Message ID | 1445853185-22518-1-git-send-email-bharata@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On 10/26/2015 04:53 AM, Bharata B Rao wrote: > Add support to hot remove pc-dimm memory devices. > > TODO: In response to memory hot removal operation on a DIMM device, > guest kernel might refuse to offline a few LMBs that are part of that device. > In such cases, we will have a DIMM device that has some LMBs online and some > LMBs offline. To avoid this situation, drmgr could be enhanced to support > a command line option that results in removal of all the LMBs or none. > We had discussed an update to drmgr previously that would allow one to specify that they wanted to remove a certain number of LMBs starting at a specified drc index. I would assume we could incorporate the notion that it is an all or nothing request into that. Unfortunately right now drmgr will attempt to remove all of the requested LMBs but it does a best effort. This results in situations where some LMB's are not removed and some are. -Nathan > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > Changes in v1: > - Got rid of the patch that introduced a field in PCDIMMDevice to track > DIMM marked for removal since we can track that using within DRC > object. > - Removed the patch that added return value to rtas_set_indicator() > since the required changes are already pushed by Michael Roth. > > v0: > > hw/ppc/spapr.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_drc.c | 18 +++++++++++ > 2 files changed, 107 insertions(+), 1 deletion(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index e1202ce..f5b1ac2 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2174,6 +2174,85 @@ out: > error_propagate(errp, local_err); > } > > +typedef struct sPAPRDIMMState { > + uint32_t nr_lmbs; > +} sPAPRDIMMState; > + > +static void spapr_lmb_release(DeviceState *dev, void *opaque) > +{ > + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; > + HotplugHandler *hotplug_ctrl = NULL; > + Error *local_err = NULL; > + > + if (--ds->nr_lmbs) { > + return; > + } > + > + g_free(ds); > + > + /* > + * Now that all the LMBs have been removed by the guest, call the > + * pc-dimm unplug handler to cleanup up the pc-dimm device. > + */ > + hotplug_ctrl = qdev_get_hotplug_handler(dev); > + hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); > +} > + > +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, > + Error **errp) > +{ > + sPAPRDRConnector *drc; > + sPAPRDRConnectorClass *drck; > + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; > + Error *local_err = NULL; > + int i; > + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); > + > + ds->nr_lmbs = nr_lmbs; > + for (i = 0; i < nr_lmbs; i++) { > + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, > + addr/SPAPR_MEMORY_BLOCK_SIZE); > + g_assert(drc); > + > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + drck->detach(drc, dev, spapr_lmb_release, ds, &local_err); > + addr += SPAPR_MEMORY_BLOCK_SIZE; > + } > + spapr_hotplug_req_remove_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); > +} > + > +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + > + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > + object_unparent(OBJECT(dev)); > +} > + > +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + Error *local_err = NULL; > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + uint64_t size = memory_region_size(mr); > + uint64_t addr; > + > + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); > + if (local_err) { > + goto out; > + } > + > + spapr_del_lmbs(dev, addr, size, &local_err); > +out: > + error_propagate(errp, local_err); > +} > + > static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > @@ -2221,7 +2300,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > - error_setg(errp, "Memory hot unplug not supported by sPAPR"); > + spapr_memory_unplug(hotplug_dev, dev, errp); > + } > +} > + > +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > + spapr_memory_unplug_request(hotplug_dev, dev, errp); > } > } > > @@ -2263,6 +2350,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > hc->plug = spapr_machine_device_plug; > hc->unplug = spapr_machine_device_unplug; > mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; > + hc->unplug_request = spapr_machine_device_unplug_request; > > smc->dr_lmb_enabled = false; > fwc->get_dev_path = spapr_get_fw_dev_path; > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > index 5d6ea7c..59b6ea9 100644 > --- a/hw/ppc/spapr_drc.c > +++ b/hw/ppc/spapr_drc.c > @@ -11,6 +11,7 @@ > */ > > #include "hw/ppc/spapr_drc.h" > +#include "hw/ppc/spapr.h" > #include "qom/object.h" > #include "hw/qdev.h" > #include "qapi/visitor.h" > @@ -77,6 +78,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, > } > } > > + /* > + * Fail any request to ISOLATE the LMB DRC if this LMB doesn't > + * belong to a DIMM device that is marked for removal. > + * > + * Currently the guest userspace tool drmgr that drives the memory > + * hotplug/unplug will just try to remove a set of 'removable' LMBs > + * in response to a hot unplug request that is based on drc-count. > + * If the LMB being removed doesn't belong to a DIMM device that is > + * actually being unplugged, fail the isolation request here. > + */ > + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { > + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && > + !drc->awaiting_release) { > + return RTAS_OUT_HW_ERROR; > + } > + } > + > drc->isolation_state = state; > > if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { >
On Tue, Oct 27, 2015 at 02:52:13PM -0500, Nathan Fontenot wrote: > On 10/26/2015 04:53 AM, Bharata B Rao wrote: > > Add support to hot remove pc-dimm memory devices. > > > > TODO: In response to memory hot removal operation on a DIMM device, > > guest kernel might refuse to offline a few LMBs that are part of that device. > > In such cases, we will have a DIMM device that has some LMBs online and some > > LMBs offline. To avoid this situation, drmgr could be enhanced to support > > a command line option that results in removal of all the LMBs or none. > > > > We had discussed an update to drmgr previously that would allow one to specify > that they wanted to remove a certain number of LMBs starting at a specified drc > index. I would assume we could incorporate the notion that it is an all or > nothing request into that. Right, but since that requires PAPR spec changes, I thought for now we could add such an option to drmgr and use that from rtas_errd on PowerKVM to correctly do memory hot unplug. Not feasible ? Regards, Bharata.
On Mon, Oct 26, 2015 at 03:23:05PM +0530, Bharata B Rao wrote: > Add support to hot remove pc-dimm memory devices. Sorry it's taken me so long to look at this. > TODO: In response to memory hot removal operation on a DIMM device, > guest kernel might refuse to offline a few LMBs that are part of that device. > In such cases, we will have a DIMM device that has some LMBs online and some > LMBs offline. To avoid this situation, drmgr could be enhanced to support > a command line option that results in removal of all the LMBs or none. Hm.. what would be the end result of such a situation? We want to handle it as gracefully as we can, even if the guest has old tools. Is there some way we can detect this failure condition, and re-connect the DIMM? It does highlight the fact that the PAPR hotplug interface and the pc-dimm model don't work together terribly well. I think we have to try to support it for the sake of management layers, but I do wonder if we ought to thinkg about an alternative "lmb-pool" backend, where the precise location of memory blocks isn't so important. With some thought such a backend might also be useful for paravirt x86. Which also makes me think, I wonder if it would be possible to wire up a PAPR compatible interface to qemu's balloon backend, since in some ways the PAPR memory hotplug model acts more like a balloon (in that the guest physical address of removed LMBs isn't usually important to the host). Still, we need to get the dimm backed model working first, I guess. Apart from those overall considerations, the patch looks good. > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > Changes in v1: > - Got rid of the patch that introduced a field in PCDIMMDevice to track > DIMM marked for removal since we can track that using within DRC > object. > - Removed the patch that added return value to rtas_set_indicator() > since the required changes are already pushed by Michael Roth. > > v0: > > hw/ppc/spapr.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_drc.c | 18 +++++++++++ > 2 files changed, 107 insertions(+), 1 deletion(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index e1202ce..f5b1ac2 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2174,6 +2174,85 @@ out: > error_propagate(errp, local_err); > } > > +typedef struct sPAPRDIMMState { > + uint32_t nr_lmbs; > +} sPAPRDIMMState; > + > +static void spapr_lmb_release(DeviceState *dev, void *opaque) > +{ > + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; > + HotplugHandler *hotplug_ctrl = NULL; > + Error *local_err = NULL; > + > + if (--ds->nr_lmbs) { > + return; > + } > + > + g_free(ds); > + > + /* > + * Now that all the LMBs have been removed by the guest, call the > + * pc-dimm unplug handler to cleanup up the pc-dimm device. > + */ > + hotplug_ctrl = qdev_get_hotplug_handler(dev); > + hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); > +} > + > +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, > + Error **errp) > +{ > + sPAPRDRConnector *drc; > + sPAPRDRConnectorClass *drck; > + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; > + Error *local_err = NULL; > + int i; > + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); > + > + ds->nr_lmbs = nr_lmbs; > + for (i = 0; i < nr_lmbs; i++) { > + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, > + addr/SPAPR_MEMORY_BLOCK_SIZE); > + g_assert(drc); > + > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + drck->detach(drc, dev, spapr_lmb_release, ds, &local_err); > + addr += SPAPR_MEMORY_BLOCK_SIZE; > + } > + spapr_hotplug_req_remove_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); > +} > + > +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + > + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > + object_unparent(OBJECT(dev)); > +} > + > +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + Error *local_err = NULL; > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + uint64_t size = memory_region_size(mr); > + uint64_t addr; > + > + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); > + if (local_err) { > + goto out; > + } > + > + spapr_del_lmbs(dev, addr, size, &local_err); > +out: > + error_propagate(errp, local_err); > +} > + > static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > @@ -2221,7 +2300,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > - error_setg(errp, "Memory hot unplug not supported by sPAPR"); > + spapr_memory_unplug(hotplug_dev, dev, errp); > + } > +} > + > +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > + spapr_memory_unplug_request(hotplug_dev, dev, errp); > } > } > > @@ -2263,6 +2350,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > hc->plug = spapr_machine_device_plug; > hc->unplug = spapr_machine_device_unplug; > mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; > + hc->unplug_request = spapr_machine_device_unplug_request; > > smc->dr_lmb_enabled = false; > fwc->get_dev_path = spapr_get_fw_dev_path; > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > index 5d6ea7c..59b6ea9 100644 > --- a/hw/ppc/spapr_drc.c > +++ b/hw/ppc/spapr_drc.c > @@ -11,6 +11,7 @@ > */ > > #include "hw/ppc/spapr_drc.h" > +#include "hw/ppc/spapr.h" > #include "qom/object.h" > #include "hw/qdev.h" > #include "qapi/visitor.h" > @@ -77,6 +78,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, > } > } > > + /* > + * Fail any request to ISOLATE the LMB DRC if this LMB doesn't > + * belong to a DIMM device that is marked for removal. > + * > + * Currently the guest userspace tool drmgr that drives the memory > + * hotplug/unplug will just try to remove a set of 'removable' LMBs > + * in response to a hot unplug request that is based on drc-count. > + * If the LMB being removed doesn't belong to a DIMM device that is > + * actually being unplugged, fail the isolation request here. > + */ > + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { > + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && > + !drc->awaiting_release) { > + return RTAS_OUT_HW_ERROR; > + } > + } > + > drc->isolation_state = state; > > if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
On Wed, Nov 11, 2015 at 12:36:30PM +1100, David Gibson wrote: > On Mon, Oct 26, 2015 at 03:23:05PM +0530, Bharata B Rao wrote: > > Add support to hot remove pc-dimm memory devices. > > Sorry it's taken me so long to look at this. > > > TODO: In response to memory hot removal operation on a DIMM device, > > guest kernel might refuse to offline a few LMBs that are part of that device. > > In such cases, we will have a DIMM device that has some LMBs online and some > > LMBs offline. To avoid this situation, drmgr could be enhanced to support > > a command line option that results in removal of all the LMBs or none. I am realizing that it might not be possible to support the above notion of zero or full removal of DIMM unless we have an association of LMBs to DIMMs. drmgr could be handling unplug requests of multiple DIMMs at a time and it is really not possible to determine how many LMBs of which DIMM got off-lined successfully. With this memory unplug patch that I have posted here and with the existing drmgr, this is the behaviour: - If guest kernel refuses to offline some LMBs of the DIMM for which hot unplug request is done, such LMBs will remain online while the rest of the LMBs from the DIMM will be offline. The DIMM device will continue to exist from QEMU point of view and will be removed only when all the LMBs belonging to it are removed. - Since we implement LMB addition/removal by count, during an unplug request, drmgr will walk through all the removable/hotplugged DIMMs and will attempt to unplug the specified number of LMBs. In addition to other things, this involves marking the LMB offline first (via /sys/devices/system/memory/memoryXX/online) and then releasing the DRC which involves isolating the DRC. QEMU will fail isolation request for any LMB DRC that has not been marked for removal via prior DIMM device_del operation. In response to this failure, drmgr will bring the LMB back online by writing to /sys/devices/system/memory/memoryXX/online. Thus some LMBs that are unrelated to the current DIMM removal request are unnecessarily made offline only to be brought online immediately. There is another situation possible here. Let DIMM1 have 4 LMBs out of which 2 LMBs became offline in response to unplug request. Now during the removal of DIMM2, there is nothing that prevents those 2 LMBs from DIMM1 from going away (if guest allows). So we have a situation where some LMBs of DIMM1 got offline when running unplug request for DIMM2. Though I have not seen this in practice, I believe it is possible. > > Hm.. what would be the end result of such a situation? We want to > handle it as gracefully as we can, even if the guest has old tools. > Is there some way we can detect this failure condition, and re-connect > the DIMM? > > It does highlight the fact that the PAPR hotplug interface and the > pc-dimm model don't work together terribly well. I think we have to I believe we will have a saner model when PAPR is updated to support removal of specified number of LMBs starting at a particular DRC index. With that in place, we could support pc-dimm model removal correctly. > try to support it for the sake of management layers, but I do wonder > if we ought to thinkg about an alternative "lmb-pool" backend, where > the precise location of memory blocks isn't so important. With some > thought such a backend might also be useful for paravirt x86. > > Which also makes me think, I wonder if it would be possible to wire up > a PAPR compatible interface to qemu's balloon backend, since in some > ways the PAPR memory hotplug model acts more like a balloon (in that > the guest physical address of removed LMBs isn't usually important to > the host). > > Still, we need to get the dimm backed model working first, I guess. > So 1. can we live with the current hotunplug behaviour that I described above for now or 2. should we put pc-dimm based memory hotunplug on backburner until we get PAPR spec changes in place or 3. start exploring lmb-pool model ? Regards, Bharata.
On Wed, Nov 11, 2015 at 12:50:57PM +0530, Bharata B Rao wrote: > On Wed, Nov 11, 2015 at 12:36:30PM +1100, David Gibson wrote: > > On Mon, Oct 26, 2015 at 03:23:05PM +0530, Bharata B Rao wrote: > > > Add support to hot remove pc-dimm memory devices. > > > > Sorry it's taken me so long to look at this. > > > > > TODO: In response to memory hot removal operation on a DIMM device, > > > guest kernel might refuse to offline a few LMBs that are part of that device. > > > In such cases, we will have a DIMM device that has some LMBs online and some > > > LMBs offline. To avoid this situation, drmgr could be enhanced to support > > > a command line option that results in removal of all the LMBs or none. > > I am realizing that it might not be possible to support the above notion of > zero or full removal of DIMM unless we have an association of LMBs > to DIMMs. Aren't LMBs implicitly associated to DIMMs by physical address? > drmgr could be handling unplug requests of multiple DIMMs > at a time and it is really not possible to determine how many LMBs of > which DIMM got off-lined successfully. I think I'm missing something. So far I can see that that would be a bit fiddly, but I don't see an inherent problem. > With this memory unplug patch that I have posted here and with the existing > drmgr, this is the behaviour: > > - If guest kernel refuses to offline some LMBs of the DIMM for which > hot unplug request is done, such LMBs will remain online while the > rest of the LMBs from the DIMM will be offline. The DIMM device will > continue to exist from QEMU point of view and will be removed only when > all the LMBs belonging to it are removed. Yeah, that's a bit nasty :/ > - Since we implement LMB addition/removal by count, during an unplug request, > drmgr will walk through all the removable/hotplugged DIMMs and will attempt > to unplug the specified number of LMBs. In addition to other things, this > involves marking the LMB offline first (via > /sys/devices/system/memory/memoryXX/online) and then releasing the DRC > which involves isolating the DRC. > > QEMU will fail isolation request for any LMB DRC that has not been marked > for removal via prior DIMM device_del operation. In response to this > failure, drmgr will bring the LMB back online by writing to > /sys/devices/system/memory/memoryXX/online. > > Thus some LMBs that are unrelated to the current DIMM removal request > are unnecessarily made offline only to be brought online immediately. "Some". As far as I can tell, with no hint address, you'd expect to go through on average half of the LMBs other than the ones in the right DIMM before you hit the right DIMM. Which means with a reasonably large number of DIMMs you'll churn through nearly half your memory (on average) on every attempted DIMM removal. I suspect "awful" would rather understate the performance impact of this on a well-utilized system. > There is another situation possible here. Let DIMM1 have 4 LMBs out of > which 2 LMBs became offline in response to unplug request. Now during > the removal of DIMM2, there is nothing that prevents those 2 LMBs from > DIMM1 from going away (if guest allows). So we have a situation where > some LMBs of DIMM1 got offline when running unplug request for DIMM2. > Though I have not seen this in practice, I believe it is possible. Ick. Yeah, it seems like we could only really keep this sane with both the pc-dimm and PAPR interfaces if we forced serialization of each DIMM unplug, somehow detected failure of one and rolled it back before attempting another one. > > Hm.. what would be the end result of such a situation? We want to > > handle it as gracefully as we can, even if the guest has old tools. > > Is there some way we can detect this failure condition, and re-connect > > the DIMM? > > > > It does highlight the fact that the PAPR hotplug interface and the > > pc-dimm model don't work together terribly well. I think we have to > > I believe we will have a saner model when PAPR is updated to support > removal of specified number of LMBs starting at a particular DRC index. > With that in place, we could support pc-dimm model removal correctly. Right. Do we have any guess at a timeline for this? > > try to support it for the sake of management layers, but I do wonder > > if we ought to thinkg about an alternative "lmb-pool" backend, where > > the precise location of memory blocks isn't so important. With some > > thought such a backend might also be useful for paravirt x86. > > > > Which also makes me think, I wonder if it would be possible to wire up > > a PAPR compatible interface to qemu's balloon backend, since in some > > ways the PAPR memory hotplug model acts more like a balloon (in that > > the guest physical address of removed LMBs isn't usually important to > > the host). > > > > Still, we need to get the dimm backed model working first, I guess. > > > > So > > 1. can we live with the current hotunplug behaviour that I described above > for now or Unless I've missed something to mitigate the memmove()-half-of-RAM problem, then no, I don't think we can. > 2. should we put pc-dimm based memory hotunplug on backburner until we get > PAPR spec changes in place or > 3. start exploring lmb-pool model ? I'm not sure which of these will work best. From the qemu and kernel perspective alone, I think (3) will give sane results faster, but updating libvirt and the rest of the management stack to handle this different way of doing things might be an awful lot of extra work and time.
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e1202ce..f5b1ac2 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2174,6 +2174,85 @@ out: error_propagate(errp, local_err); } +typedef struct sPAPRDIMMState { + uint32_t nr_lmbs; +} sPAPRDIMMState; + +static void spapr_lmb_release(DeviceState *dev, void *opaque) +{ + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; + HotplugHandler *hotplug_ctrl = NULL; + Error *local_err = NULL; + + if (--ds->nr_lmbs) { + return; + } + + g_free(ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_ctrl = qdev_get_hotplug_handler(dev); + hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); +} + +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, + Error **errp) +{ + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; + Error *local_err = NULL; + int i; + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); + + ds->nr_lmbs = nr_lmbs; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, spapr_lmb_release, ds, &local_err); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + spapr_hotplug_req_remove_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); +} + +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + object_unparent(OBJECT(dev)); +} + +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + Error *local_err = NULL; + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + uint64_t addr; + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + + spapr_del_lmbs(dev, addr, size, &local_err); +out: + error_propagate(errp, local_err); +} + static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2221,7 +2300,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - error_setg(errp, "Memory hot unplug not supported by sPAPR"); + spapr_memory_unplug(hotplug_dev, dev, errp); + } +} + +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); } } @@ -2263,6 +2350,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + hc->unplug_request = spapr_machine_device_unplug_request; smc->dr_lmb_enabled = false; fwc->get_dev_path = spapr_get_fw_dev_path; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 5d6ea7c..59b6ea9 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -11,6 +11,7 @@ */ #include "hw/ppc/spapr_drc.h" +#include "hw/ppc/spapr.h" #include "qom/object.h" #include "hw/qdev.h" #include "qapi/visitor.h" @@ -77,6 +78,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, } } + /* + * Fail any request to ISOLATE the LMB DRC if this LMB doesn't + * belong to a DIMM device that is marked for removal. + * + * Currently the guest userspace tool drmgr that drives the memory + * hotplug/unplug will just try to remove a set of 'removable' LMBs + * in response to a hot unplug request that is based on drc-count. + * If the LMB being removed doesn't belong to a DIMM device that is + * actually being unplugged, fail the isolation request here. + */ + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + !drc->awaiting_release) { + return RTAS_OUT_HW_ERROR; + } + } + drc->isolation_state = state; if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
Add support to hot remove pc-dimm memory devices. TODO: In response to memory hot removal operation on a DIMM device, guest kernel might refuse to offline a few LMBs that are part of that device. In such cases, we will have a DIMM device that has some LMBs online and some LMBs offline. To avoid this situation, drmgr could be enhanced to support a command line option that results in removal of all the LMBs or none. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- Changes in v1: - Got rid of the patch that introduced a field in PCDIMMDevice to track DIMM marked for removal since we can track that using within DRC object. - Removed the patch that added return value to rtas_set_indicator() since the required changes are already pushed by Michael Roth. v0: hw/ppc/spapr.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++- hw/ppc/spapr_drc.c | 18 +++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-)