Message ID | 1385001528-12003-14-git-send-email-imammedo@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, Nov 21, 2013 at 03:38:34AM +0100, Igor Mammedov wrote: > - implements QEMU hardware part of memory hotplug protocol > described at "docs/specs/acpi_mem_hotplug.txt" > - handles only memory add notification event for now > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > --- > docs/specs/acpi_mem_hotplug.txt | 38 ++++++++++ > hw/acpi/core.c | 144 +++++++++++++++++++++++++++++++++++++++ > include/hw/acpi/acpi.h | 24 +++++++ > 3 files changed, 206 insertions(+), 0 deletions(-) > create mode 100644 docs/specs/acpi_mem_hotplug.txt > > diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt > new file mode 100644 > index 0000000..fc34142 > --- /dev/null > +++ b/docs/specs/acpi_mem_hotplug.txt > @@ -0,0 +1,38 @@ > +QEMU<->ACPI BIOS memory hotplug interface > +-------------------------------------- > + > +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add > +or hot-remove events. > + > +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): > +--------------------------------------------------------------- > +0xa00: > + read access: > + [0x0-0x3] Lo part of memory device phys address > + [0x4-0x7] Hi part of memory device phys address > + [0x8-0xb] Lo part of memory device size in bytes > + [0xc-0xf] Hi part of memory device size in bytes > + [0x14] highest memory hot-plug interface version supported by QEMU So this can make guest fail gracefully but it appears that detecting guest version would be nicer? It would let us actually support old guests ... > + [0x15] Memory device status fields > + bits: > + 1: device is enabled and may be used by guest > + 2: device insert event, used by ACPI BIOS to distinguish > + device for which no device check event to OSPM was issued what does the above mean? what if device is not present? > + [0x16-0x17] reserved > + > + write access: > + [0x0-0x3] Memory device slot selector, selects active memory device. > + All following accesses to other registers in 0xaf80-0xaf97 > + region will read/store data from/to selected memory device. > + [0x4-0x7] OST event code reported by OSPM > + [0x8-0xb] OST status code reported by OSPM > + [0x15] Memory device status fields this is control, not status? > + bits: > + 2: if set to 1 clears device insert event, set by ACPI BIOS > + after it has sent device check event to OSPM for > + seleted memory device selected? How about we actually require guest to enable memory? This way if we hotplug, but old guest does not enable and puts a PCI device there, it just works. > + > +Selecting memory device slot beyond present range has no effect on platform: > + - not documented above write accesses to memory hot-plug registers > + are ignored; > + - not documented above read accesses to memory hot-plug registers return 0xFF 00 would be cleaner (matches not enabled - no event)? > diff --git a/hw/acpi/core.c b/hw/acpi/core.c > index 8c0d48c..18e169c 100644 > --- a/hw/acpi/core.c > +++ b/hw/acpi/core.c > @@ -680,3 +680,147 @@ void acpi_update_sci(ACPIREGS *regs, qemu_irq irq, uint32_t gpe0_sts_mask) > (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && > !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); > } > + > +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, > + unsigned int size) > +{ > + uint32_t val = 0; > + MemHotplugState *mem_st = opaque; > + MemStatus *mdev; > + > + if (mem_st->selector >= mem_st->dev_count) { > + return 0; > + } > + > + mdev = &mem_st->devs[mem_st->selector]; > + switch (addr) { > + case 0x0: /* Lo part of phys address where DIMM is mapped */ > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL); > + break; > + case 0x4: /* Hi part of phys address where DIMM is mapped */ > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL) >> 32; > + break; > + case 0x8: /* Lo part of DIMM size */ > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL); > + break; > + case 0xc: /* Hi part of DIMM size */ > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL) >> 32; > + break; > + case 0x10: /* node proximity for _PXM method */ > + val = object_property_get_int(OBJECT(mdev->dimm), "node", NULL); > + break; > + case 0x14: /* intf version */ > + val = 1; > + break; > + case 0x15: /* pack and return is_* fields */ > + val |= mdev->is_enabled ? 1 : 0; > + val |= mdev->is_inserting ? 2 : 0; > + break; > + } > + return val; > +} > + > +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, > + unsigned int size) > +{ > + MemHotplugState *mem_st = opaque; > + MemStatus *mdev; > + > + if (!mem_st->dev_count) { > + return; > + } > + > + if (addr) { > + if (mem_st->selector >= mem_st->dev_count) { > + return; > + } > + } > + > + switch (addr) { > + case 0x0: /* DIMM slot selector */ > + mem_st->selector = data; > + break; > + case 0x4: /* _OST event */ > + mdev = &mem_st->devs[mem_st->selector]; > + if (data == 1) { > + /* TODO: handle device insert OST event */ > + } else if (data == 3) { > + /* TODO: handle device remove OST event */ > + } > + mdev->ost_event = data; > + break; > + case 0x8: /* _OST status */ > + mdev = &mem_st->devs[mem_st->selector]; > + mdev->ost_status = data; > + /* TODO: report async error */ > + /* TODO: implement memory removal on guest signal */ > + break; > + case 0x15: > + mdev = &mem_st->devs[mem_st->selector]; > + if (data & 2) { /* clear insert event */ > + mdev->is_inserting = false; > + } > + break; > + } > + > +} > +static const MemoryRegionOps acpi_memory_hotplug_ops = { > + .read = acpi_memory_hotplug_read, > + .write = acpi_memory_hotplug_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .valid = { > + .min_access_size = 1, > + .max_access_size = 4, > + }, > +}; > + > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > + MemHotplugState *state) > +{ > + QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory-opts"), NULL); > + g_assert(opts); > + > + state->dev_count = qemu_opt_get_number(opts, "slots", 0); > + > + if (!state->dev_count) { > + return; > + } > + > + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); > + memory_region_init_io(io, owner, &acpi_memory_hotplug_ops, state, > + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); > +} > + > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > + DeviceState *dev, HotplugState state) > +{ > + MemStatus *mdev; > + Error *local_err = NULL; > + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); > + > + if (error_is_set(&local_err)) { > + qerror_report_err(local_err); > + error_free(local_err); > + return -1; > + } > + > + if (slot >= mem_st->dev_count) { > + char *dev_path = object_get_canonical_path(OBJECT(dev)); > + qerror_report(ERROR_CLASS_GENERIC_ERROR, "acpi_memory_hotplug_cb: " > + "device [%s] returned invalid memory slot[%d]", > + dev_path, slot); > + g_free(dev_path); > + return -1; > + } > + > + mdev = &mem_st->devs[slot]; > + if (state == HOTPLUG_ENABLED) { > + mdev->dimm = dev; > + mdev->is_enabled = true; > + mdev->is_inserting = true; > + } > + > + /* do ACPI magic */ > + regs->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; > + return 0; > +} > diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h > index c4ae7d7..e5717df 100644 > --- a/include/hw/acpi/acpi.h > +++ b/include/hw/acpi/acpi.h > @@ -168,6 +168,30 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); > > void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq, uint32_t gpe0_sts_mask); > > +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 > +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 > + > +#define ACPI_MEMORY_HOTPLUG_STATUS 8 > + > +typedef struct MemStatus { > + DeviceState *dimm; > + bool is_enabled; > + bool is_inserting; > + uint32_t ost_event; > + uint32_t ost_status; > +} MemStatus; > + > +typedef struct MemHotplugState { > + uint32_t selector; > + uint32_t dev_count; > + MemStatus *devs; > +} MemHotplugState; > + > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > + MemHotplugState *state); > + > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > + DeviceState *dev, HotplugState state); > /* acpi.c */ > extern int acpi_enabled; > extern char unsigned *acpi_tables; > -- > 1.7.1
On Thu, 21 Nov 2013 11:42:02 +0200 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Thu, Nov 21, 2013 at 03:38:34AM +0100, Igor Mammedov wrote: > > - implements QEMU hardware part of memory hotplug protocol > > described at "docs/specs/acpi_mem_hotplug.txt" > > - handles only memory add notification event for now > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > > --- > > docs/specs/acpi_mem_hotplug.txt | 38 ++++++++++ > > hw/acpi/core.c | 144 +++++++++++++++++++++++++++++++++++++++ > > include/hw/acpi/acpi.h | 24 +++++++ > > 3 files changed, 206 insertions(+), 0 deletions(-) > > create mode 100644 docs/specs/acpi_mem_hotplug.txt > > > > diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt > > new file mode 100644 > > index 0000000..fc34142 > > --- /dev/null > > +++ b/docs/specs/acpi_mem_hotplug.txt > > @@ -0,0 +1,38 @@ > > +QEMU<->ACPI BIOS memory hotplug interface > > +-------------------------------------- > > + > > +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add > > +or hot-remove events. > > + > > +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): > > +--------------------------------------------------------------- > > +0xa00: > > + read access: > > + [0x0-0x3] Lo part of memory device phys address > > + [0x4-0x7] Hi part of memory device phys address > > + [0x8-0xb] Lo part of memory device size in bytes > > + [0xc-0xf] Hi part of memory device size in bytes > > + [0x14] highest memory hot-plug interface version supported by QEMU > > So this can make guest fail gracefully but it appears that > detecting guest version would be nicer? > It would let us actually support old guests ... my idea of how to it was, guest writes its version into [0x14] register and reads QEMU version from it back, if they do not match then than BIOS ignores GPE.3 event effectively disabling hotplug on guest side. I haven't thought about supporting multiple implementations in QEMU though. Do we really want it? > > > + [0x15] Memory device status fields > > + bits: > > + 1: device is enabled and may be used by guest > > + 2: device insert event, used by ACPI BIOS to distinguish > > + device for which no device check event to OSPM was issued > > what does the above mean? After OSPM issued device check on selected device it clears this bit to mark event as handled. It allows to avoid keeping this state in ASL (as it's done for CPU hotplug, see CPON) > what if device is not present? ASL will issue device check and clear bit, it might be a bug since _STA would report not present but no eject event was issued. Papering over it ASL could check present bit first and issue device check only if it's present. > > > + [0x16-0x17] reserved > > + > > + write access: > > + [0x0-0x3] Memory device slot selector, selects active memory device. > > + All following accesses to other registers in 0xaf80-0xaf97 > > + region will read/store data from/to selected memory device. > > + [0x4-0x7] OST event code reported by OSPM > > + [0x8-0xb] OST status code reported by OSPM > > + [0x15] Memory device status fields > > this is control, not status? Thanks, I'll fix it. > > > + bits: > > + 2: if set to 1 clears device insert event, set by ACPI BIOS > > + after it has sent device check event to OSPM for > > + seleted memory device > > selected? see "write access: [0x0-0x3]" > > How about we actually require guest to enable memory? > > This way if we hotplug, but old guest does not enable > and puts a PCI device there, it just works. I've lost you here, could you elaborate pls? > > > + > > +Selecting memory device slot beyond present range has no effect on platform: > > + - not documented above write accesses to memory hot-plug registers > > + are ignored; > > + - not documented above read accesses to memory hot-plug registers return 0xFF > > 00 would be cleaner (matches not enabled - no event)? I'm following pattern, where reading from not present IO port returns 0xFF on hardware. Fact that ASL reads 0xFF could be used as not supported indication. > > > diff --git a/hw/acpi/core.c b/hw/acpi/core.c > > index 8c0d48c..18e169c 100644 > > --- a/hw/acpi/core.c > > +++ b/hw/acpi/core.c > > @@ -680,3 +680,147 @@ void acpi_update_sci(ACPIREGS *regs, qemu_irq irq, uint32_t gpe0_sts_mask) > > (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && > > !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); > > } > > + > > +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, > > + unsigned int size) > > +{ > > + uint32_t val = 0; > > + MemHotplugState *mem_st = opaque; > > + MemStatus *mdev; > > + > > + if (mem_st->selector >= mem_st->dev_count) { > > + return 0; > > + } > > + > > + mdev = &mem_st->devs[mem_st->selector]; > > + switch (addr) { > > + case 0x0: /* Lo part of phys address where DIMM is mapped */ > > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL); > > + break; > > + case 0x4: /* Hi part of phys address where DIMM is mapped */ > > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL) >> 32; > > + break; > > + case 0x8: /* Lo part of DIMM size */ > > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL); > > + break; > > + case 0xc: /* Hi part of DIMM size */ > > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL) >> 32; > > + break; > > + case 0x10: /* node proximity for _PXM method */ > > + val = object_property_get_int(OBJECT(mdev->dimm), "node", NULL); > > + break; > > + case 0x14: /* intf version */ > > + val = 1; > > + break; > > + case 0x15: /* pack and return is_* fields */ > > + val |= mdev->is_enabled ? 1 : 0; > > + val |= mdev->is_inserting ? 2 : 0; > > + break; > > + } > > + return val; > > +} > > + > > +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, > > + unsigned int size) > > +{ > > + MemHotplugState *mem_st = opaque; > > + MemStatus *mdev; > > + > > + if (!mem_st->dev_count) { > > + return; > > + } > > + > > + if (addr) { > > + if (mem_st->selector >= mem_st->dev_count) { > > + return; > > + } > > + } > > + > > + switch (addr) { > > + case 0x0: /* DIMM slot selector */ > > + mem_st->selector = data; > > + break; > > + case 0x4: /* _OST event */ > > + mdev = &mem_st->devs[mem_st->selector]; > > + if (data == 1) { > > + /* TODO: handle device insert OST event */ > > + } else if (data == 3) { > > + /* TODO: handle device remove OST event */ > > + } > > + mdev->ost_event = data; > > + break; > > + case 0x8: /* _OST status */ > > + mdev = &mem_st->devs[mem_st->selector]; > > + mdev->ost_status = data; > > + /* TODO: report async error */ > > + /* TODO: implement memory removal on guest signal */ > > + break; > > + case 0x15: > > + mdev = &mem_st->devs[mem_st->selector]; > > + if (data & 2) { /* clear insert event */ > > + mdev->is_inserting = false; > > + } > > + break; > > + } > > + > > +} > > +static const MemoryRegionOps acpi_memory_hotplug_ops = { > > + .read = acpi_memory_hotplug_read, > > + .write = acpi_memory_hotplug_write, > > + .endianness = DEVICE_LITTLE_ENDIAN, > > + .valid = { > > + .min_access_size = 1, > > + .max_access_size = 4, > > + }, > > +}; > > + > > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > > + MemHotplugState *state) > > +{ > > + QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory-opts"), NULL); > > + g_assert(opts); > > + > > + state->dev_count = qemu_opt_get_number(opts, "slots", 0); > > + > > + if (!state->dev_count) { > > + return; > > + } > > + > > + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); > > + memory_region_init_io(io, owner, &acpi_memory_hotplug_ops, state, > > + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); > > +} > > + > > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > > + DeviceState *dev, HotplugState state) > > +{ > > + MemStatus *mdev; > > + Error *local_err = NULL; > > + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); > > + > > + if (error_is_set(&local_err)) { > > + qerror_report_err(local_err); > > + error_free(local_err); > > + return -1; > > + } > > + > > + if (slot >= mem_st->dev_count) { > > + char *dev_path = object_get_canonical_path(OBJECT(dev)); > > + qerror_report(ERROR_CLASS_GENERIC_ERROR, "acpi_memory_hotplug_cb: " > > + "device [%s] returned invalid memory slot[%d]", > > + dev_path, slot); > > + g_free(dev_path); > > + return -1; > > + } > > + > > + mdev = &mem_st->devs[slot]; > > + if (state == HOTPLUG_ENABLED) { > > + mdev->dimm = dev; > > + mdev->is_enabled = true; > > + mdev->is_inserting = true; > > + } > > + > > + /* do ACPI magic */ > > + regs->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; > > + return 0; > > +} > > diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h > > index c4ae7d7..e5717df 100644 > > --- a/include/hw/acpi/acpi.h > > +++ b/include/hw/acpi/acpi.h > > @@ -168,6 +168,30 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); > > > > void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq, uint32_t gpe0_sts_mask); > > > > +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 > > +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 > > + > > +#define ACPI_MEMORY_HOTPLUG_STATUS 8 > > + > > +typedef struct MemStatus { > > + DeviceState *dimm; > > + bool is_enabled; > > + bool is_inserting; > > + uint32_t ost_event; > > + uint32_t ost_status; > > +} MemStatus; > > + > > +typedef struct MemHotplugState { > > + uint32_t selector; > > + uint32_t dev_count; > > + MemStatus *devs; > > +} MemHotplugState; > > + > > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > > + MemHotplugState *state); > > + > > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > > + DeviceState *dev, HotplugState state); > > /* acpi.c */ > > extern int acpi_enabled; > > extern char unsigned *acpi_tables; > > -- > > 1.7.1
On Thu, Nov 21, 2013 at 03:21:37PM +0100, Igor Mammedov wrote: > On Thu, 21 Nov 2013 11:42:02 +0200 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > On Thu, Nov 21, 2013 at 03:38:34AM +0100, Igor Mammedov wrote: > > > - implements QEMU hardware part of memory hotplug protocol > > > described at "docs/specs/acpi_mem_hotplug.txt" > > > - handles only memory add notification event for now > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > > > --- > > > docs/specs/acpi_mem_hotplug.txt | 38 ++++++++++ > > > hw/acpi/core.c | 144 +++++++++++++++++++++++++++++++++++++++ > > > include/hw/acpi/acpi.h | 24 +++++++ > > > 3 files changed, 206 insertions(+), 0 deletions(-) > > > create mode 100644 docs/specs/acpi_mem_hotplug.txt > > > > > > diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt > > > new file mode 100644 > > > index 0000000..fc34142 > > > --- /dev/null > > > +++ b/docs/specs/acpi_mem_hotplug.txt > > > @@ -0,0 +1,38 @@ > > > +QEMU<->ACPI BIOS memory hotplug interface > > > +-------------------------------------- > > > + > > > +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add > > > +or hot-remove events. > > > + > > > +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): > > > +--------------------------------------------------------------- > > > +0xa00: > > > + read access: > > > + [0x0-0x3] Lo part of memory device phys address > > > + [0x4-0x7] Hi part of memory device phys address > > > + [0x8-0xb] Lo part of memory device size in bytes > > > + [0xc-0xf] Hi part of memory device size in bytes > > > + [0x14] highest memory hot-plug interface version supported by QEMU > > > > So this can make guest fail gracefully but it appears that > > detecting guest version would be nicer? > > It would let us actually support old guests ... > > my idea of how to it was, > guest writes its version into [0x14] register and reads QEMU version > from it back, if they do not match then than BIOS ignores GPE.3 event > effectively disabling hotplug on guest side. > I haven't thought about supporting multiple implementations in QEMU though. > Do we really want it? I'm talking about old bios which does not read acpi from qemu. We want it to work even if it can't see hotplugged memory. > > > > > + [0x15] Memory device status fields > > > + bits: > > > + 1: device is enabled and may be used by guest > > > + 2: device insert event, used by ACPI BIOS to distinguish > > > + device for which no device check event to OSPM was issued > > > > what does the above mean? > After OSPM issued device check on selected device it clears this bit to mark event > as handled. > It allows to avoid keeping this state in ASL (as it's done for CPU hotplug, see CPON) That's fine. > > what if device is not present? > ASL will issue device check and clear bit, it might be a bug since _STA would report > not present but no eject event was issued. > > Papering over it ASL could check present bit first and issue device check only if > it's present. Is this a problem? If yes - that will still be racy won't it? Also, should guest reset eject memory that we requested unplug for? > > > > > + [0x16-0x17] reserved > > > + > > > + write access: > > > + [0x0-0x3] Memory device slot selector, selects active memory device. > > > + All following accesses to other registers in 0xaf80-0xaf97 > > > + region will read/store data from/to selected memory device. > > > + [0x4-0x7] OST event code reported by OSPM > > > + [0x8-0xb] OST status code reported by OSPM > > > + [0x15] Memory device status fields > > > > this is control, not status? > Thanks, I'll fix it. > > > > > > + bits: > > > + 2: if set to 1 clears device insert event, set by ACPI BIOS > > > + after it has sent device check event to OSPM for > > > + seleted memory device > > > > selected? > see "write access: [0x0-0x3]" yes but you have a typo above > > > > How about we actually require guest to enable memory? > > > > This way if we hotplug, but old guest does not enable > > and puts a PCI device there, it just works. > I've lost you here, could you elaborate pls? Assume qemu adds memory by hotplug. Is it immediately enabled? I suggest it's not enabled, and only enable after ACPI enables it (or after reboot?) > > > > > + > > > +Selecting memory device slot beyond present range has no effect on platform: > > > + - not documented above write accesses to memory hot-plug registers > > > + are ignored; > > > + - not documented above read accesses to memory hot-plug registers return 0xFF > > > > 00 would be cleaner (matches not enabled - no event)? > I'm following pattern, where reading from not present IO port returns 0xFF on hardware. > Fact that ASL reads 0xFF could be used as not supported indication. But isn't this a valid pattern when all bits are set? > > > > > diff --git a/hw/acpi/core.c b/hw/acpi/core.c > > > index 8c0d48c..18e169c 100644 > > > --- a/hw/acpi/core.c > > > +++ b/hw/acpi/core.c > > > @@ -680,3 +680,147 @@ void acpi_update_sci(ACPIREGS *regs, qemu_irq irq, uint32_t gpe0_sts_mask) > > > (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && > > > !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); > > > } > > > + > > > +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, > > > + unsigned int size) > > > +{ > > > + uint32_t val = 0; > > > + MemHotplugState *mem_st = opaque; > > > + MemStatus *mdev; > > > + > > > + if (mem_st->selector >= mem_st->dev_count) { > > > + return 0; > > > + } > > > + > > > + mdev = &mem_st->devs[mem_st->selector]; > > > + switch (addr) { > > > + case 0x0: /* Lo part of phys address where DIMM is mapped */ > > > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL); > > > + break; > > > + case 0x4: /* Hi part of phys address where DIMM is mapped */ > > > + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL) >> 32; > > > + break; > > > + case 0x8: /* Lo part of DIMM size */ > > > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL); > > > + break; > > > + case 0xc: /* Hi part of DIMM size */ > > > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL) >> 32; > > > + break; > > > + case 0x10: /* node proximity for _PXM method */ > > > + val = object_property_get_int(OBJECT(mdev->dimm), "node", NULL); > > > + break; > > > + case 0x14: /* intf version */ > > > + val = 1; > > > + break; > > > + case 0x15: /* pack and return is_* fields */ > > > + val |= mdev->is_enabled ? 1 : 0; > > > + val |= mdev->is_inserting ? 2 : 0; > > > + break; > > > + } > > > + return val; > > > +} > > > + > > > +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, > > > + unsigned int size) > > > +{ > > > + MemHotplugState *mem_st = opaque; > > > + MemStatus *mdev; > > > + > > > + if (!mem_st->dev_count) { > > > + return; > > > + } > > > + > > > + if (addr) { > > > + if (mem_st->selector >= mem_st->dev_count) { > > > + return; > > > + } > > > + } > > > + > > > + switch (addr) { > > > + case 0x0: /* DIMM slot selector */ > > > + mem_st->selector = data; > > > + break; > > > + case 0x4: /* _OST event */ > > > + mdev = &mem_st->devs[mem_st->selector]; > > > + if (data == 1) { > > > + /* TODO: handle device insert OST event */ > > > + } else if (data == 3) { > > > + /* TODO: handle device remove OST event */ > > > + } > > > + mdev->ost_event = data; > > > + break; > > > + case 0x8: /* _OST status */ > > > + mdev = &mem_st->devs[mem_st->selector]; > > > + mdev->ost_status = data; > > > + /* TODO: report async error */ > > > + /* TODO: implement memory removal on guest signal */ > > > + break; > > > + case 0x15: > > > + mdev = &mem_st->devs[mem_st->selector]; > > > + if (data & 2) { /* clear insert event */ > > > + mdev->is_inserting = false; > > > + } > > > + break; > > > + } > > > + > > > +} > > > +static const MemoryRegionOps acpi_memory_hotplug_ops = { > > > + .read = acpi_memory_hotplug_read, > > > + .write = acpi_memory_hotplug_write, > > > + .endianness = DEVICE_LITTLE_ENDIAN, > > > + .valid = { > > > + .min_access_size = 1, > > > + .max_access_size = 4, > > > + }, > > > +}; > > > + > > > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > > > + MemHotplugState *state) > > > +{ > > > + QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory-opts"), NULL); > > > + g_assert(opts); > > > + > > > + state->dev_count = qemu_opt_get_number(opts, "slots", 0); > > > + > > > + if (!state->dev_count) { > > > + return; > > > + } > > > + > > > + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); > > > + memory_region_init_io(io, owner, &acpi_memory_hotplug_ops, state, > > > + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); > > > +} > > > + > > > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > > > + DeviceState *dev, HotplugState state) > > > +{ > > > + MemStatus *mdev; > > > + Error *local_err = NULL; > > > + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); > > > + > > > + if (error_is_set(&local_err)) { > > > + qerror_report_err(local_err); > > > + error_free(local_err); > > > + return -1; > > > + } > > > + > > > + if (slot >= mem_st->dev_count) { > > > + char *dev_path = object_get_canonical_path(OBJECT(dev)); > > > + qerror_report(ERROR_CLASS_GENERIC_ERROR, "acpi_memory_hotplug_cb: " > > > + "device [%s] returned invalid memory slot[%d]", > > > + dev_path, slot); > > > + g_free(dev_path); > > > + return -1; > > > + } > > > + > > > + mdev = &mem_st->devs[slot]; > > > + if (state == HOTPLUG_ENABLED) { > > > + mdev->dimm = dev; > > > + mdev->is_enabled = true; > > > + mdev->is_inserting = true; > > > + } > > > + > > > + /* do ACPI magic */ > > > + regs->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; > > > + return 0; > > > +} > > > diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h > > > index c4ae7d7..e5717df 100644 > > > --- a/include/hw/acpi/acpi.h > > > +++ b/include/hw/acpi/acpi.h > > > @@ -168,6 +168,30 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); > > > > > > void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq, uint32_t gpe0_sts_mask); > > > > > > +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 > > > +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 > > > + > > > +#define ACPI_MEMORY_HOTPLUG_STATUS 8 > > > + > > > +typedef struct MemStatus { > > > + DeviceState *dimm; > > > + bool is_enabled; > > > + bool is_inserting; > > > + uint32_t ost_event; > > > + uint32_t ost_status; > > > +} MemStatus; > > > + > > > +typedef struct MemHotplugState { > > > + uint32_t selector; > > > + uint32_t dev_count; > > > + MemStatus *devs; > > > +} MemHotplugState; > > > + > > > +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, > > > + MemHotplugState *state); > > > + > > > +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, > > > + DeviceState *dev, HotplugState state); > > > /* acpi.c */ > > > extern int acpi_enabled; > > > extern char unsigned *acpi_tables; > > > -- > > > 1.7.1
On Thu, 21 Nov 2013 16:38:47 +0200 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Thu, Nov 21, 2013 at 03:21:37PM +0100, Igor Mammedov wrote: > > On Thu, 21 Nov 2013 11:42:02 +0200 > > "Michael S. Tsirkin" <mst@redhat.com> wrote: [...] > > > > + [0x14] highest memory hot-plug interface version supported by QEMU > > > > > > So this can make guest fail gracefully but it appears that > > > detecting guest version would be nicer? > > > It would let us actually support old guests ... > > > > my idea of how to it was, > > guest writes its version into [0x14] register and reads QEMU version > > from it back, if they do not match then than BIOS ignores GPE.3 event > > effectively disabling hotplug on guest side. > > I haven't thought about supporting multiple implementations in QEMU though. > > Do we really want it? > > I'm talking about old bios which does not read acpi from qemu. > We want it to work even if it can't see hotplugged memory. to sum up cases that were discussed on IRC. * migration from NEW to OLD machine leads us to state when we run new BIOS with ACPI tables supporting memory hotplug on OLD machine. In general should work since NEW machine has to be started without memory hotplug enabled, which leads to disabling related ASL handler in ACPI tables. Case where NEW machine with memory hotplug enabled migrating to OLD machine without memory hotplug is invalid since migration will fail early due to missing devices (DimmBus/DIMM devices) * running OLD BIOS on new machine with memory hotplug turned on at QEMU CLI qemu -m X,slots=Y,maxmem=Z -bios old-bios-image -M pc-1.8 Problem here is that OLD BIOS doesn't know about 'etc/reserved-memory-end' fwcfg, so it can start mapping PCI BARs right after high memory (i.e. in memory hotplug reserved space) That's fine as far as there is no DIMM devices since access will fall through hotplug-mem container to PCI address space. But if DIMM device added at startup or runtime, its MemoryRegion will overshadow PCI BARs mapped at its range. If it were only hotadd case, then QEMU could start with enabled but not active memory hotplug and require BIOS to acknowledge (activate) hotlug so that adding DIMM devices wouldn't be possible without hotplug enabled BIOS. That would guarantee empty hotplug-mem container for OLD BIOS since it can't activate hotplug and avoid conflict. But if there are cold-plugged DIMM devices on QEMU CLI, that wouldn't work because OLD BIOS has no idea about them (i.e. it reads above4gb memory CMOS value) and it is bound to map 64-bit BARs in invalid place. It would be nice to run OLD BIOS on new machine and it will even work IF memory hotplug is not enabled. But I'm not sure we should care about case when not compatible BIOS is running on machine with active memory hotplug. > > > > > > > > + [0x15] Memory device status fields > > > > + bits: > > > > + 1: device is enabled and may be used by guest > > > > + 2: device insert event, used by ACPI BIOS to distinguish > > > > + device for which no device check event to OSPM was issued > > > > > > what does the above mean? > > After OSPM issued device check on selected device it clears this bit to mark event > > as handled. > > It allows to avoid keeping this state in ASL (as it's done for CPU hotplug, see CPON) > > That's fine. > > > > what if device is not present? > > ASL will issue device check and clear bit, it might be a bug since _STA would report > > not present but no eject event was issued. > > > > Papering over it ASL could check present bit first and issue device check only if > > it's present. > > Is this a problem? If yes - that will still be racy won't it? I thought about it some more and I don't see why it would be racy. bit 0x15.1 is set when there is initialized DIMM device in slot so 0x15.2 couldn't be set if 0x15.1 == 0. May be following description would be better: 2: device insert event, used by ACPI BIOS to distinguish device for which no device check event to OSPM was issued. Bit is valid only when 0x15.1 bit is set. It's possible to remove is_inserting bit altogether and just send device check to each present memory device. Device check will be NOP for memory devices that OSPM processed. That will work fine with small amount of slots but would increase load on guest in case of 1000s devices. That's the main reason for dedicated is_inserting bit, allowing to handle hotadd effectively. > > > Also, should guest reset eject memory that we requested unplug for? I'm not sure what you saying here in general. In particular unplug is not part of this series. [...] > > > selected? > > see "write access: [0x0-0x3]" > > yes but you have a typo above Ahh, blind me. Thanks, I'll fix it. > > > > > > > How about we actually require guest to enable memory? > > > > > > This way if we hotplug, but old guest does not enable > > > and puts a PCI device there, it just works. > > I've lost you here, could you elaborate pls? > > > Assume qemu adds memory by hotplug. > Is it immediately enabled? > I suggest it's not enabled, and only enable after ACPI > enables it (or after reboot?) I guess answer is in "running OLD BIOS on new machine with memory hotplug turned on" case above. i.e. Don't run not compatible BIOS with turned on memory hotplug. I think simpler interface/work-flow is better than a more complicated one, that is required to support not totally sane configuration that is bound to be broken anyway. > > > > > > > > + > > > > +Selecting memory device slot beyond present range has no effect on platform: > > > > + - not documented above write accesses to memory hot-plug registers > > > > + are ignored; > > > > + - not documented above read accesses to memory hot-plug registers return 0xFF > > > > > > 00 would be cleaner (matches not enabled - no event)? > > I'm following pattern, where reading from not present IO port returns 0xFF on hardware. > > Fact that ASL reads 0xFF could be used as not supported indication. > > But isn't this a valid pattern when all bits are set? It's to the same extent as 0x0. I'll change it to 0x0 if you still think it's better. [...]
On Fri, Nov 22, 2013 at 06:14:46PM +0100, Igor Mammedov wrote: > On Thu, 21 Nov 2013 16:38:47 +0200 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > On Thu, Nov 21, 2013 at 03:21:37PM +0100, Igor Mammedov wrote: > > > On Thu, 21 Nov 2013 11:42:02 +0200 > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > [...] > > > > > + [0x14] highest memory hot-plug interface version supported by QEMU > > > > > > > > So this can make guest fail gracefully but it appears that > > > > detecting guest version would be nicer? > > > > It would let us actually support old guests ... > > > > > > my idea of how to it was, > > > guest writes its version into [0x14] register and reads QEMU version > > > from it back, if they do not match then than BIOS ignores GPE.3 event > > > effectively disabling hotplug on guest side. > > > I haven't thought about supporting multiple implementations in QEMU though. > > > Do we really want it? > > > > I'm talking about old bios which does not read acpi from qemu. > > We want it to work even if it can't see hotplugged memory. > to sum up cases that were discussed on IRC. > > * migration from NEW to OLD machine leads us to state when we run new BIOS with > ACPI tables supporting memory hotplug on OLD machine. > In general should work since NEW machine has to be started without memory > hotplug enabled, which leads to disabling related ASL handler in ACPI tables. > Case where NEW machine with memory hotplug enabled migrating to OLD machine > without memory hotplug is invalid since migration will fail early due to > missing devices (DimmBus/DIMM devices) > > * running OLD BIOS on new machine with memory hotplug turned on at QEMU CLI > qemu -m X,slots=Y,maxmem=Z -bios old-bios-image -M pc-1.8 > > Problem here is that OLD BIOS doesn't know about 'etc/reserved-memory-end' fwcfg, > so it can start mapping PCI BARs right after high memory (i.e. in memory hotplug > reserved space) > > That's fine as far as there is no DIMM devices since access will fall through > hotplug-mem container to PCI address space. > > But if DIMM device added at startup or runtime, its MemoryRegion will overshadow > PCI BARs mapped at its range. > > If it were only hotadd case, then QEMU could start with enabled but not active > memory hotplug and require BIOS to acknowledge (activate) hotlug so that adding > DIMM devices wouldn't be possible without hotplug enabled BIOS. > That would guarantee empty hotplug-mem container for OLD BIOS since it can't > activate hotplug and avoid conflict. > > But if there are cold-plugged DIMM devices on QEMU CLI, that wouldn't work > because OLD BIOS has no idea about them (i.e. it reads above4gb memory CMOS value) > and it is bound to map 64-bit BARs in invalid place. > > It would be nice to run OLD BIOS on new machine and it will even work IF memory > hotplug is not enabled. > But I'm not sure we should care about case when not compatible BIOS is running > on machine with active memory hotplug. It's not a must, I agree. It's just a useful excercise that might help us think of potential problems. Here this made me think of the following question: should not memory available on boot be listed in CMOS? Let's assume new BIOS gets interrupt telling it there's new memory. It clears the interrupt and then system is reset. Will system after reset detect the new hotplugged memory? How? > > > > > > > > > > > + [0x15] Memory device status fields > > > > > + bits: > > > > > + 1: device is enabled and may be used by guest > > > > > + 2: device insert event, used by ACPI BIOS to distinguish > > > > > + device for which no device check event to OSPM was issued > > > > > > > > what does the above mean? > > > After OSPM issued device check on selected device it clears this bit to mark event > > > as handled. > > > It allows to avoid keeping this state in ASL (as it's done for CPU hotplug, see CPON) > > > > That's fine. > > > > > > what if device is not present? > > > ASL will issue device check and clear bit, it might be a bug since _STA would report > > > not present but no eject event was issued. > > > > > > Papering over it ASL could check present bit first and issue device check only if > > > it's present. > > > > Is this a problem? If yes - that will still be racy won't it? > I thought about it some more and I don't see why it would be racy. > bit 0x15.1 is set when there is initialized DIMM device in slot so 0x15.2 couldn't > be set if 0x15.1 == 0. > > May be following description would be better: > > 2: device insert event, used by ACPI BIOS to distinguish > device for which no device check event to OSPM was issued. > Bit is valid only when 0x15.1 bit is set. > > It's possible to remove is_inserting bit altogether and just send device check to > each present memory device. Device check will be NOP for memory devices that OSPM > processed. > That will work fine with small amount of slots but would increase load on guest > in case of 1000s devices. That's the main reason for dedicated is_inserting bit, > allowing to handle hotadd effectively. > > > > > > > > Also, should guest reset eject memory that we requested unplug for? > I'm not sure what you saying here in general. > In particular unplug is not part of this series. > > > [...] > > > > selected? > > > see "write access: [0x0-0x3]" > > > > yes but you have a typo above > Ahh, blind me. Thanks, I'll fix it. > > > > > > > > > > > How about we actually require guest to enable memory? > > > > > > > > This way if we hotplug, but old guest does not enable > > > > and puts a PCI device there, it just works. > > > I've lost you here, could you elaborate pls? > > > > > > Assume qemu adds memory by hotplug. > > Is it immediately enabled? > > I suggest it's not enabled, and only enable after ACPI > > enables it (or after reboot?) > I guess answer is in "running OLD BIOS on new machine with memory hotplug turned on" > case above. i.e. Don't run not compatible BIOS with turned on memory hotplug. > I think simpler interface/work-flow is better than a more complicated one, > that is required to support not totally sane configuration that is bound to be broken anyway. > > > > > > > > > > > > + > > > > > +Selecting memory device slot beyond present range has no effect on platform: > > > > > + - not documented above write accesses to memory hot-plug registers > > > > > + are ignored; > > > > > + - not documented above read accesses to memory hot-plug registers return 0xFF > > > > > > > > 00 would be cleaner (matches not enabled - no event)? > > > I'm following pattern, where reading from not present IO port returns 0xFF on hardware. > > > Fact that ASL reads 0xFF could be used as not supported indication. > > > > But isn't this a valid pattern when all bits are set? > It's to the same extent as 0x0. I'll change it to 0x0 if you still think it's better. > > [...]
"Michael S. Tsirkin" <mst@redhat.com> writes: [...] > Here this made me think of the following question: > should not memory available on boot be listed in CMOS? > Let's assume new BIOS gets interrupt telling it there's new memory. > It clears the interrupt and then system is reset. > > Will system after reset detect the new hotplugged memory? > How? Do physical systems with hot-pluggable memory exist? How do they behave? [...]
Il 25/11/2013 08:27, Markus Armbruster ha scritto: > "Michael S. Tsirkin" <mst@redhat.com> writes: > > [...] >> Here this made me think of the following question: >> should not memory available on boot be listed in CMOS? >> Let's assume new BIOS gets interrupt telling it there's new memory. >> It clears the interrupt and then system is reset. >> >> Will system after reset detect the new hotplugged memory? Can it just scan all slots from MHPD._INI? For example on reset all slots could be disabled by the ACPIHotpluggableDimmBus (even coldplugged ones), and scanned + enabled by ASL. >> How? > > Do physical systems with hot-pluggable memory exist? How do they > behave? I guess they assume BIOS is updated in lockstep with the introduction of the feature. Paolo
On Sun, 24 Nov 2013 12:58:57 +0200 "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Fri, Nov 22, 2013 at 06:14:46PM +0100, Igor Mammedov wrote: > > On Thu, 21 Nov 2013 16:38:47 +0200 > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > On Thu, Nov 21, 2013 at 03:21:37PM +0100, Igor Mammedov wrote: > > > > On Thu, 21 Nov 2013 11:42:02 +0200 > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > [...] > > > > > > + [0x14] highest memory hot-plug interface version supported by QEMU > > > > > > > > > > So this can make guest fail gracefully but it appears that > > > > > detecting guest version would be nicer? > > > > > It would let us actually support old guests ... > > > > > > > > my idea of how to it was, > > > > guest writes its version into [0x14] register and reads QEMU version > > > > from it back, if they do not match then than BIOS ignores GPE.3 event > > > > effectively disabling hotplug on guest side. > > > > I haven't thought about supporting multiple implementations in QEMU though. > > > > Do we really want it? > > > > > > I'm talking about old bios which does not read acpi from qemu. > > > We want it to work even if it can't see hotplugged memory. > > to sum up cases that were discussed on IRC. > > > > * migration from NEW to OLD machine leads us to state when we run new BIOS with > > ACPI tables supporting memory hotplug on OLD machine. > > In general should work since NEW machine has to be started without memory > > hotplug enabled, which leads to disabling related ASL handler in ACPI tables. > > Case where NEW machine with memory hotplug enabled migrating to OLD machine > > without memory hotplug is invalid since migration will fail early due to > > missing devices (DimmBus/DIMM devices) > > > > * running OLD BIOS on new machine with memory hotplug turned on at QEMU CLI > > qemu -m X,slots=Y,maxmem=Z -bios old-bios-image -M pc-1.8 > > > > Problem here is that OLD BIOS doesn't know about 'etc/reserved-memory-end' fwcfg, > > so it can start mapping PCI BARs right after high memory (i.e. in memory hotplug > > reserved space) > > > > That's fine as far as there is no DIMM devices since access will fall through > > hotplug-mem container to PCI address space. > > > > But if DIMM device added at startup or runtime, its MemoryRegion will overshadow > > PCI BARs mapped at its range. > > > > If it were only hotadd case, then QEMU could start with enabled but not active > > memory hotplug and require BIOS to acknowledge (activate) hotlug so that adding > > DIMM devices wouldn't be possible without hotplug enabled BIOS. > > That would guarantee empty hotplug-mem container for OLD BIOS since it can't > > activate hotplug and avoid conflict. > > > > But if there are cold-plugged DIMM devices on QEMU CLI, that wouldn't work > > because OLD BIOS has no idea about them (i.e. it reads above4gb memory CMOS value) > > and it is bound to map 64-bit BARs in invalid place. > > > > It would be nice to run OLD BIOS on new machine and it will even work IF memory > > hotplug is not enabled. > > But I'm not sure we should care about case when not compatible BIOS is running > > on machine with active memory hotplug. > > It's not a must, I agree. It's just a useful excercise that might > help us think of potential problems. > > Here this made me think of the following question: > should not memory available on boot be listed in CMOS? > Let's assume new BIOS gets interrupt telling it there's new memory. > It clears the interrupt and then system is reset. > > Will system after reset detect the new hotplugged memory? > How? I don't have memory hotplug enabled real hardware and can only deduce what is needed from guest behavior. So here is my observations: * windows: hotpluggable memory doesn't have to be in E820, OS sees and automatically onlines present hotpluggable memory. * linux: kernel enumerates present memory devices and sends events to udev. It's up to user space to provide udev scripts for onlining hotpluggable memory now. So in general rebooting at any time shouldn't hurt, and guest would see hotplugged memory. But linux kernel won't use it until udev onlines it. I see adding present at boot hotpluggable memory to E820 as a nice additional feature which is not must have but I'm looking to implementing it as follow-up, along with moving generation of related smbios tables into QEMU as you have done for ACPI tables. > > > > > > > > > > > > > > + [0x15] Memory device status fields > > > > > > + bits: > > > > > > + 1: device is enabled and may be used by guest > > > > > > + 2: device insert event, used by ACPI BIOS to distinguish > > > > > > + device for which no device check event to OSPM was issued > > > > > > > > > > what does the above mean? > > > > After OSPM issued device check on selected device it clears this bit to mark event > > > > as handled. > > > > It allows to avoid keeping this state in ASL (as it's done for CPU hotplug, see CPON) > > > > > > That's fine. > > > > > > > > what if device is not present? > > > > ASL will issue device check and clear bit, it might be a bug since _STA would report > > > > not present but no eject event was issued. > > > > > > > > Papering over it ASL could check present bit first and issue device check only if > > > > it's present. > > > > > > Is this a problem? If yes - that will still be racy won't it? > > I thought about it some more and I don't see why it would be racy. > > bit 0x15.1 is set when there is initialized DIMM device in slot so 0x15.2 couldn't > > be set if 0x15.1 == 0. > > > > May be following description would be better: > > > > 2: device insert event, used by ACPI BIOS to distinguish > > device for which no device check event to OSPM was issued. > > Bit is valid only when 0x15.1 bit is set. > > > > It's possible to remove is_inserting bit altogether and just send device check to > > each present memory device. Device check will be NOP for memory devices that OSPM > > processed. > > That will work fine with small amount of slots but would increase load on guest > > in case of 1000s devices. That's the main reason for dedicated is_inserting bit, > > allowing to handle hotadd effectively. > > > > > > > > > > > > > Also, should guest reset eject memory that we requested unplug for? > > I'm not sure what you saying here in general. > > In particular unplug is not part of this series. > > > > > > [...] > > > > > selected? > > > > see "write access: [0x0-0x3]" > > > > > > yes but you have a typo above > > Ahh, blind me. Thanks, I'll fix it. > > > > > > > > > > > > > > > How about we actually require guest to enable memory? > > > > > > > > > > This way if we hotplug, but old guest does not enable > > > > > and puts a PCI device there, it just works. > > > > I've lost you here, could you elaborate pls? > > > > > > > > > Assume qemu adds memory by hotplug. > > > Is it immediately enabled? > > > I suggest it's not enabled, and only enable after ACPI > > > enables it (or after reboot?) > > I guess answer is in "running OLD BIOS on new machine with memory hotplug turned on" > > case above. i.e. Don't run not compatible BIOS with turned on memory hotplug. > > I think simpler interface/work-flow is better than a more complicated one, > > that is required to support not totally sane configuration that is bound to be broken anyway. > > > > > > > > > > > > > > > > + > > > > > > +Selecting memory device slot beyond present range has no effect on platform: > > > > > > + - not documented above write accesses to memory hot-plug registers > > > > > > + are ignored; > > > > > > + - not documented above read accesses to memory hot-plug registers return 0xFF > > > > > > > > > > 00 would be cleaner (matches not enabled - no event)? > > > > I'm following pattern, where reading from not present IO port returns 0xFF on hardware. > > > > Fact that ASL reads 0xFF could be used as not supported indication. > > > > > > But isn't this a valid pattern when all bits are set? > > It's to the same extent as 0x0. I'll change it to 0x0 if you still think it's better. > > > > [...]
On Mon, 25 Nov 2013 14:45:15 +0100 Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 25/11/2013 08:27, Markus Armbruster ha scritto: > > "Michael S. Tsirkin" <mst@redhat.com> writes: > > > > [...] > >> Here this made me think of the following question: > >> should not memory available on boot be listed in CMOS? > >> Let's assume new BIOS gets interrupt telling it there's new memory. > >> It clears the interrupt and then system is reset. > >> > >> Will system after reset detect the new hotplugged memory? > > Can it just scan all slots from MHPD._INI? > > For example on reset all slots could be disabled by the > ACPIHotpluggableDimmBus (even coldplugged ones), and scanned + enabled > by ASL. memory devices are scanned by OSPM during ACPI tables parsing and guest (can) use present devices right away. Currently I don't see necessity for enabling individual memory devices from ASL since it adds one more round trip for QEMU<->guest protocol for no particular benefit. The only use case Michael suggested would be running OLD BIOS (without memory hotplug support) on machine with actively used memory hotplug (if OLD BIOS runs on machine with memory hotplug but there is no hotplugged memory devices, it runs just fine), so case looks like not worth an effort and complexity it would bring. I believe having completely functional DIMM device after its realize() method is completed is more strait-forward and easier to understand/support than having guest to complete it's initialization. That would allow later to put present at boot DIMMs into E820 (way before ASL is executed) if that would be necessary and also help to convert initial memory into hotpluggable one as well. > > >> How? > > > > Do physical systems with hot-pluggable memory exist? How do they > > behave? > > I guess they assume BIOS is updated in lockstep with the introduction of > the feature. > > Paolo >
Il 25/11/2013 15:18, Igor Mammedov ha scritto: >> > For example on reset all slots could be disabled by the >> > ACPIHotpluggableDimmBus (even coldplugged ones), and scanned + enabled >> > by ASL. > memory devices are scanned by OSPM during ACPI tables parsing and > guest (can) use present devices right away. Yes, what I'm saying is scan them to enable them so that OSPM sees them already as online. > Currently I don't see necessity for enabling individual memory devices from > ASL since it adds one more round trip for QEMU<->guest protocol for no > particular benefit. The only use case Michael suggested would be running > OLD BIOS (without memory hotplug support) on machine with actively used > memory hotplug (if OLD BIOS runs on machine with memory hotplug but there is > no hotplugged memory devices, it runs just fine), so case looks like not worth ^^^^^^^^^^ *Hotpluggable*, not hotplugged. It would also break just with coldplugged memory devices. > That would allow later to put present at boot DIMMs into E820 (way before ASL > is executed) if that would be necessary and also help to convert initial > memory into hotpluggable one as well. I am afraid that mixing hot-unplug with E820 could cause some unanticipated trouble. OSes may not expect memory in the E820 ranges to disappear. But since we're speaking about e820, do guests work if you make the hotpluggable memory region reserved in the e820? That should work with old BIOS too using QEMU_CFG_E820_TABLE. If that works, I see no need for an "enabled" bit either. Paolo > an effort and complexity it would bring.
On Mon, 25 Nov 2013 15:31:22 +0100 Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 25/11/2013 15:18, Igor Mammedov ha scritto: > >> > For example on reset all slots could be disabled by the > >> > ACPIHotpluggableDimmBus (even coldplugged ones), and scanned + enabled > >> > by ASL. > > memory devices are scanned by OSPM during ACPI tables parsing and > > guest (can) use present devices right away. > > Yes, what I'm saying is scan them to enable them so that OSPM sees them > already as online. > > > Currently I don't see necessity for enabling individual memory devices from > > ASL since it adds one more round trip for QEMU<->guest protocol for no > > particular benefit. The only use case Michael suggested would be running > > OLD BIOS (without memory hotplug support) on machine with actively used > > memory hotplug (if OLD BIOS runs on machine with memory hotplug but there is > > no hotplugged memory devices, it runs just fine), so case looks like not worth > ^^^^^^^^^^ > > *Hotpluggable*, not hotplugged. It would also break just with > coldplugged memory devices. > > > That would allow later to put present at boot DIMMs into E820 (way before ASL > > is executed) if that would be necessary and also help to convert initial > > memory into hotpluggable one as well. > > I am afraid that mixing hot-unplug with E820 could cause some > unanticipated trouble. OSes may not expect memory in the E820 ranges to > disappear. > > But since we're speaking about e820, do guests work if you make the > hotpluggable memory region reserved in the e820? That should work with > old BIOS too using QEMU_CFG_E820_TABLE. If that works, I see no need > for an "enabled" bit either. It's on my to do list to make more code reading/experiments/testing to see if that is ok with guest. I can't say it right now, that's why E820 changes is not part of this series. As for OLD BIOS it should be aware about not continuous memory ranges and calculate present ram below/above4g via E820, there was Gerd's patch to introduce new E820 fwcfg from QEMU and I hope to use/extend it for telling BIOS about present coldplugged DIMMs. But I guess it's not for OLD BIOS anyway, since it OLD SeaBIOS uses CMOS for getting ram sizes. > > Paolo > > > an effort and complexity it would bring. >
On 11/20/2013 07:38 PM, Igor Mammedov wrote: > - implements QEMU hardware part of memory hotplug protocol > described at "docs/specs/acpi_mem_hotplug.txt" > - handles only memory add notification event for now > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > --- > +0xa00: > + read access: > + [0x0-0x3] Lo part of memory device phys address > + [0x4-0x7] Hi part of memory device phys address > + [0x8-0xb] Lo part of memory device size in bytes > + [0xc-0xf] Hi part of memory device size in bytes > + [0x14] highest memory hot-plug interface version supported by QEMU Nothing about [0x10-0x13]? [Ah, I see you mention it later] > + [0x15] Memory device status fields > + bits: > + 1: device is enabled and may be used by guest > + 2: device insert event, used by ACPI BIOS to distinguish > + device for which no device check event to OSPM was issued Why start at bit 1 instead of bit 0? Also, should you document that remaining bits (2-7) should be ignored on read? > + [0x16-0x17] reserved > + > + write access: > + [0x0-0x3] Memory device slot selector, selects active memory device. > + All following accesses to other registers in 0xaf80-0xaf97 > + region will read/store data from/to selected memory device. > + [0x4-0x7] OST event code reported by OSPM > + [0x8-0xb] OST status code reported by OSPM > + [0x15] Memory device status fields Nothing about behavior of [0xc-0x14]? Is it an error to write there, or are the writes just ignored, or...? > + bits: > + 2: if set to 1 clears device insert event, set by ACPI BIOS > + after it has sent device check event to OSPM for > + seleted memory device s/seleted/selected/ What must be written into the other bits? Must reserved bits be written as 0, or the user do a read-modify-write, or...? > + > +Selecting memory device slot beyond present range has no effect on platform: > + - not documented above write accesses to memory hot-plug registers > + are ignored; Reads awkwardly. Better is: write accesses to memory hot-plug registers not documented above are ignored > + - not documented above read accesses to memory hot-plug registers return 0xFF Similar awkward wording. > + case 0xc: /* Hi part of DIMM size */ > + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL) >> 32; > + break; > + case 0x10: /* node proximity for _PXM method */ > + val = object_property_get_int(OBJECT(mdev->dimm), "node", NULL); > + break; This wasn't documented. > + case 0x14: /* intf version */ > + val = 1; > + break; > + case 0x15: /* pack and return is_* fields */ > + val |= mdev->is_enabled ? 1 : 0; > + val |= mdev->is_inserting ? 2 : 0; This is bit 0 and 1, not bit 1 and 2.
diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt new file mode 100644 index 0000000..fc34142 --- /dev/null +++ b/docs/specs/acpi_mem_hotplug.txt @@ -0,0 +1,38 @@ +QEMU<->ACPI BIOS memory hotplug interface +-------------------------------------- + +ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add +or hot-remove events. + +Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access): +--------------------------------------------------------------- +0xa00: + read access: + [0x0-0x3] Lo part of memory device phys address + [0x4-0x7] Hi part of memory device phys address + [0x8-0xb] Lo part of memory device size in bytes + [0xc-0xf] Hi part of memory device size in bytes + [0x14] highest memory hot-plug interface version supported by QEMU + [0x15] Memory device status fields + bits: + 1: device is enabled and may be used by guest + 2: device insert event, used by ACPI BIOS to distinguish + device for which no device check event to OSPM was issued + [0x16-0x17] reserved + + write access: + [0x0-0x3] Memory device slot selector, selects active memory device. + All following accesses to other registers in 0xaf80-0xaf97 + region will read/store data from/to selected memory device. + [0x4-0x7] OST event code reported by OSPM + [0x8-0xb] OST status code reported by OSPM + [0x15] Memory device status fields + bits: + 2: if set to 1 clears device insert event, set by ACPI BIOS + after it has sent device check event to OSPM for + seleted memory device + +Selecting memory device slot beyond present range has no effect on platform: + - not documented above write accesses to memory hot-plug registers + are ignored; + - not documented above read accesses to memory hot-plug registers return 0xFF diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 8c0d48c..18e169c 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -680,3 +680,147 @@ void acpi_update_sci(ACPIREGS *regs, qemu_irq irq, uint32_t gpe0_sts_mask) (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); } + +static uint64_t acpi_memory_hotplug_read(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t val = 0; + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + + if (mem_st->selector >= mem_st->dev_count) { + return 0; + } + + mdev = &mem_st->devs[mem_st->selector]; + switch (addr) { + case 0x0: /* Lo part of phys address where DIMM is mapped */ + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL); + break; + case 0x4: /* Hi part of phys address where DIMM is mapped */ + val = object_property_get_int(OBJECT(mdev->dimm), "start", NULL) >> 32; + break; + case 0x8: /* Lo part of DIMM size */ + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL); + break; + case 0xc: /* Hi part of DIMM size */ + val = object_property_get_int(OBJECT(mdev->dimm), "size", NULL) >> 32; + break; + case 0x10: /* node proximity for _PXM method */ + val = object_property_get_int(OBJECT(mdev->dimm), "node", NULL); + break; + case 0x14: /* intf version */ + val = 1; + break; + case 0x15: /* pack and return is_* fields */ + val |= mdev->is_enabled ? 1 : 0; + val |= mdev->is_inserting ? 2 : 0; + break; + } + return val; +} + +static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + MemHotplugState *mem_st = opaque; + MemStatus *mdev; + + if (!mem_st->dev_count) { + return; + } + + if (addr) { + if (mem_st->selector >= mem_st->dev_count) { + return; + } + } + + switch (addr) { + case 0x0: /* DIMM slot selector */ + mem_st->selector = data; + break; + case 0x4: /* _OST event */ + mdev = &mem_st->devs[mem_st->selector]; + if (data == 1) { + /* TODO: handle device insert OST event */ + } else if (data == 3) { + /* TODO: handle device remove OST event */ + } + mdev->ost_event = data; + break; + case 0x8: /* _OST status */ + mdev = &mem_st->devs[mem_st->selector]; + mdev->ost_status = data; + /* TODO: report async error */ + /* TODO: implement memory removal on guest signal */ + break; + case 0x15: + mdev = &mem_st->devs[mem_st->selector]; + if (data & 2) { /* clear insert event */ + mdev->is_inserting = false; + } + break; + } + +} +static const MemoryRegionOps acpi_memory_hotplug_ops = { + .read = acpi_memory_hotplug_read, + .write = acpi_memory_hotplug_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, + MemHotplugState *state) +{ + QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory-opts"), NULL); + g_assert(opts); + + state->dev_count = qemu_opt_get_number(opts, "slots", 0); + + if (!state->dev_count) { + return; + } + + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); + memory_region_init_io(io, owner, &acpi_memory_hotplug_ops, state, + "apci-mem-hotplug", ACPI_MEMORY_HOTPLUG_IO_LEN); +} + +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, + DeviceState *dev, HotplugState state) +{ + MemStatus *mdev; + Error *local_err = NULL; + int slot = object_property_get_int(OBJECT(dev), "slot", &local_err); + + if (error_is_set(&local_err)) { + qerror_report_err(local_err); + error_free(local_err); + return -1; + } + + if (slot >= mem_st->dev_count) { + char *dev_path = object_get_canonical_path(OBJECT(dev)); + qerror_report(ERROR_CLASS_GENERIC_ERROR, "acpi_memory_hotplug_cb: " + "device [%s] returned invalid memory slot[%d]", + dev_path, slot); + g_free(dev_path); + return -1; + } + + mdev = &mem_st->devs[slot]; + if (state == HOTPLUG_ENABLED) { + mdev->dimm = dev; + mdev->is_enabled = true; + mdev->is_inserting = true; + } + + /* do ACPI magic */ + regs->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; + return 0; +} diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index c4ae7d7..e5717df 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -168,6 +168,30 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq, uint32_t gpe0_sts_mask); +#define ACPI_MEMORY_HOTPLUG_IO_LEN 24 +#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00 + +#define ACPI_MEMORY_HOTPLUG_STATUS 8 + +typedef struct MemStatus { + DeviceState *dimm; + bool is_enabled; + bool is_inserting; + uint32_t ost_event; + uint32_t ost_status; +} MemStatus; + +typedef struct MemHotplugState { + uint32_t selector; + uint32_t dev_count; + MemStatus *devs; +} MemHotplugState; + +void acpi_memory_hotplug_init(Object *owner, MemoryRegion *io, + MemHotplugState *state); + +int acpi_memory_hotplug_cb(ACPIREGS *regs, MemHotplugState *mem_st, + DeviceState *dev, HotplugState state); /* acpi.c */ extern int acpi_enabled; extern char unsigned *acpi_tables;
- implements QEMU hardware part of memory hotplug protocol described at "docs/specs/acpi_mem_hotplug.txt" - handles only memory add notification event for now Signed-off-by: Igor Mammedov <imammedo@redhat.com> --- docs/specs/acpi_mem_hotplug.txt | 38 ++++++++++ hw/acpi/core.c | 144 +++++++++++++++++++++++++++++++++++++++ include/hw/acpi/acpi.h | 24 +++++++ 3 files changed, 206 insertions(+), 0 deletions(-) create mode 100644 docs/specs/acpi_mem_hotplug.txt