diff mbox

[v4,2/8] acpi: add vmcoreinfo device

Message ID 20170714182012.4595-3-marcandre.lureau@redhat.com
State New
Headers show

Commit Message

Marc-André Lureau July 14, 2017, 6:20 p.m. UTC
The VM coreinfo (vmcoreinfo) device is an emulated device which
exposes a 4k memory range to the guest to store various informations
useful to debug the guest OS. (it is greatly inspired by the VMGENID
device implementation)

This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
proposed in "[PATCH 00/21] WIP: dump: add kaslr support".

A proof-of-concept kernel module:
https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
---
 include/hw/acpi/aml-build.h        |   1 +
 include/hw/acpi/vmcoreinfo.h       |  37 +++++++
 hw/acpi/aml-build.c                |   2 +
 hw/acpi/vmcoreinfo.c               | 208 +++++++++++++++++++++++++++++++++++++
 hw/i386/acpi-build.c               |  14 +++
 default-configs/arm-softmmu.mak    |   1 +
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 docs/specs/vmcoreinfo.txt          | 138 ++++++++++++++++++++++++
 hw/acpi/Makefile.objs              |   1 +
 10 files changed, 404 insertions(+)
 create mode 100644 include/hw/acpi/vmcoreinfo.h
 create mode 100644 hw/acpi/vmcoreinfo.c
 create mode 100644 docs/specs/vmcoreinfo.txt

Comments

Michael S. Tsirkin July 14, 2017, 7:26 p.m. UTC | #1
On Fri, Jul 14, 2017 at 08:20:05PM +0200, Marc-André Lureau wrote:
> The VM coreinfo (vmcoreinfo) device is an emulated device which
> exposes a 4k memory range to the guest to store various informations
> useful to debug the guest OS. (it is greatly inspired by the VMGENID
> device implementation)
> 
> This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
> proposed in "[PATCH 00/21] WIP: dump: add kaslr support".
> 
> A proof-of-concept kernel module:
> https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> Reviewed-by: Laszlo Ersek <lersek@redhat.com>

It worries me that the format of this seems completely undefined
except in the patchset cover letter.
I don't think we should merge this before it is.

> ---
>  include/hw/acpi/aml-build.h        |   1 +
>  include/hw/acpi/vmcoreinfo.h       |  37 +++++++
>  hw/acpi/aml-build.c                |   2 +
>  hw/acpi/vmcoreinfo.c               | 208 +++++++++++++++++++++++++++++++++++++
>  hw/i386/acpi-build.c               |  14 +++
>  default-configs/arm-softmmu.mak    |   1 +
>  default-configs/i386-softmmu.mak   |   1 +
>  default-configs/x86_64-softmmu.mak |   1 +
>  docs/specs/vmcoreinfo.txt          | 138 ++++++++++++++++++++++++
>  hw/acpi/Makefile.objs              |   1 +
>  10 files changed, 404 insertions(+)
>  create mode 100644 include/hw/acpi/vmcoreinfo.h
>  create mode 100644 hw/acpi/vmcoreinfo.c
>  create mode 100644 docs/specs/vmcoreinfo.txt
> 
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 88d0738d76..cf781bcd34 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -211,6 +211,7 @@ struct AcpiBuildTables {
>      GArray *rsdp;
>      GArray *tcpalog;
>      GArray *vmgenid;
> +    GArray *vmcoreinfo;
>      BIOSLinker *linker;
>  } AcpiBuildTables;
>  
> diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
> new file mode 100644
> index 0000000000..6a73bcd1b2
> --- /dev/null
> +++ b/include/hw/acpi/vmcoreinfo.h
> @@ -0,0 +1,37 @@
> +#ifndef ACPI_VMCOREINFO_H
> +#define ACPI_VMCOREINFO_H
> +
> +#include "hw/acpi/bios-linker-loader.h"
> +#include "hw/qdev.h"
> +
> +#define VMCOREINFO_DEVICE           "vmcoreinfo"
> +#define VMCOREINFO_FW_CFG_FILE      "etc/vmcoreinfo"
> +#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
> +
> +/* Occupy a page of memory */
> +#define VMCOREINFO_FW_CFG_SIZE      4096
> +
> +/* allow space for OVMF SDT Header Probe Supressor */
> +#define VMCOREINFO_OFFSET           sizeof(AcpiTableHeader)
> +
> +#define VMCOREINFO(obj) OBJECT_CHECK(VMCoreInfoState, (obj), VMCOREINFO_DEVICE)
> +
> +typedef struct VMCoreInfoState {
> +    DeviceClass parent_obj;
> +    uint8_t vmcoreinfo_addr_le[8];   /* Address of memory region */
> +    bool write_pointer_available;
> +} VMCoreInfoState;
> +
> +/* returns NULL unless there is exactly one device */
> +static inline Object *find_vmcoreinfo_dev(void)
> +{
> +    return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
> +}
> +
> +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
> +                           GArray *vmci, BIOSLinker *linker);
> +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci);
> +bool vmcoreinfo_get(VMCoreInfoState *vis, uint64_t *paddr, uint32_t *size,
> +                    Error **errp);
> +
> +#endif
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 36a6cc450e..47043ade4a 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
>      tables->table_data = g_array_new(false, true /* clear */, 1);
>      tables->tcpalog = g_array_new(false, true /* clear */, 1);
>      tables->vmgenid = g_array_new(false, true /* clear */, 1);
> +    tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
>      tables->linker = bios_linker_loader_init();
>  }
>  
> @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
>      g_array_free(tables->table_data, true);
>      g_array_free(tables->tcpalog, mfre);
>      g_array_free(tables->vmgenid, mfre);
> +    g_array_free(tables->vmcoreinfo, mfre);
>  }
>  
>  /* Build rsdt table */
> diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
> new file mode 100644
> index 0000000000..0ea41de8d9
> --- /dev/null
> +++ b/hw/acpi/vmcoreinfo.c
> @@ -0,0 +1,208 @@
> +/*
> + *  Virtual Machine coreinfo device
> + *  (based on Virtual Machine Generation ID Device)
> + *
> + *  Copyright (C) 2017 Red Hat, Inc.
> + *  Copyright (C) 2017 Skyport Systems.
> + *
> + *  Authors: Marc-André Lureau <marcandre.lureau@redhat.com>
> + *           Ben Warren <ben@skyportsystems.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +#include "qemu/osdep.h"
> +#include "hw/acpi/acpi.h"
> +#include "hw/acpi/aml-build.h"
> +#include "hw/acpi/vmcoreinfo.h"
> +#include "hw/nvram/fw_cfg.h"
> +#include "sysemu/sysemu.h"
> +#include "qapi/error.h"
> +
> +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
> +                           GArray *vmci, BIOSLinker *linker)
> +{
> +    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
> +    uint32_t vcia_offset;
> +
> +    g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
> +
> +    /* Put this in a separate SSDT table */
> +    ssdt = init_aml_allocator();
> +
> +    /* Reserve space for header */
> +    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
> +
> +    /* Storage address */
> +    vcia_offset = table_data->len +
> +        build_append_named_dword(ssdt->buf, "VCIA");
> +    scope = aml_scope("\\_SB");
> +    dev = aml_device("VMCI");
> +    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
> +
> +    /* Simple status method to check that address is linked and non-zero */
> +    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
> +    addr = aml_local(0);
> +    aml_append(method, aml_store(aml_int(0xf), addr));
> +    if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
> +    aml_append(if_ctx, aml_store(aml_int(0), addr));
> +    aml_append(method, if_ctx);
> +    aml_append(method, aml_return(addr));
> +    aml_append(dev, method);
> +
> +    /* the ADDR method returns two 32-bit words representing the lower and
> +     * upper halves of the physical address of the vmcoreinfo area
> +     */
> +    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
> +
> +    addr = aml_local(0);
> +    aml_append(method, aml_store(aml_package(2), addr));
> +
> +    aml_append(method, aml_store(aml_add(aml_name("VCIA"),
> +                                         aml_int(VMCOREINFO_OFFSET), NULL),
> +                                 aml_index(addr, aml_int(0))));
> +    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
> +    aml_append(method, aml_return(addr));
> +
> +    aml_append(dev, method);
> +    aml_append(scope, dev);
> +    aml_append(ssdt, scope);
> +
> +    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
> +
> +    /* Allocate guest memory */
> +    bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
> +                             false /* page boundary, high memory */);
> +
> +    /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
> +     * blob so QEMU can read the info from there.  The address is
> +     * expected to be < 4GB, but write 64 bits anyway.
> +     * The address that is patched in is offset in order to implement
> +     * the "OVMF SDT Header probe suppressor"
> +     * see docs/specs/vmcoreinfo.txt for more details.
> +     */
> +    bios_linker_loader_write_pointer(linker,
> +        VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
> +        VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
> +
> +    /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
> +     * and read it.  Note that while we provide storage for 64 bits, only
> +     * the least-signficant 32 get patched into AML.
> +     */
> +    bios_linker_loader_add_pointer(linker,
> +        ACPI_BUILD_TABLE_FILE, vcia_offset, sizeof(uint32_t),
> +        VMCOREINFO_FW_CFG_FILE, 0);
> +
> +    build_header(linker, table_data,
> +        (void *)(table_data->data + table_data->len - ssdt->buf->len),
> +        "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
> +    free_aml_allocator();
> +}
> +
> +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci)
> +{
> +    /* Create a read-only fw_cfg file for vmcoreinfo allocation */
> +    /* XXX: linker could learn to allocate without backing fw_cfg? */
> +    fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
> +                    VMCOREINFO_FW_CFG_SIZE);
> +    /* Create a read-write fw_cfg file for Address */
> +    fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
> +                             vis->vmcoreinfo_addr_le,
> +                             ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
> +}
> +
> +bool vmcoreinfo_get(VMCoreInfoState *vis,
> +                    uint64_t *paddr, uint32_t *size,
> +                    Error **errp)
> +{
> +    uint32_t vmcoreinfo_addr;
> +    uint32_t version;
> +
> +    assert(vis);
> +    assert(paddr);
> +    assert(size);
> +
> +    memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, sizeof(vmcoreinfo_addr));
> +    vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
> +    if (!vmcoreinfo_addr) {
> +        error_setg(errp, "BIOS has not yet written the address of %s",
> +                   VMCOREINFO_DEVICE);
> +        return false;
> +    }
> +
> +    cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
> +    if (version != 0) {
> +        error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
> +        return false;
> +    }
> +
> +    cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(*paddr));
> +    *paddr = le64_to_cpu(*paddr);
> +    cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(*size));
> +    *size = le32_to_cpu(*size);
> +
> +    return true;
> +}
> +
> +static const VMStateDescription vmstate_vmcoreinfo = {
> +    .name = "vmcoreinfo",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VMCoreInfoState, sizeof(uint64_t)),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +static void vmcoreinfo_handle_reset(void *opaque)
> +{
> +    VMCoreInfoState *vis = VMCOREINFO(opaque);
> +
> +    /* Clear the guest-allocated address when the VM resets */
> +    memset(vis->vmcoreinfo_addr_le, 0, ARRAY_SIZE(vis->vmcoreinfo_addr_le));
> +}
> +
> +static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
> +{
> +    if (!bios_linker_loader_can_write_pointer()) {
> +        error_setg(errp, "%s requires DMA write support in fw_cfg, "
> +                   "which this machine type does not provide",
> +                   VMCOREINFO_DEVICE);
> +        return;
> +    }
> +
> +    /* Given that this function is executing, there is at least one VMCOREINFO
> +     * device. Check if there are several.
> +     */
> +    if (!find_vmcoreinfo_dev()) {
> +        error_setg(errp, "at most one %s device is permitted",
> +                   VMCOREINFO_DEVICE);
> +        return;
> +    }
> +
> +    qemu_register_reset(vmcoreinfo_handle_reset, dev);
> +}
> +
> +static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->vmsd = &vmstate_vmcoreinfo;
> +    dc->realize = vmcoreinfo_realize;
> +    dc->hotpluggable = false;
> +}
> +
> +static const TypeInfo vmcoreinfo_device_info = {
> +    .name          = VMCOREINFO_DEVICE,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(VMCoreInfoState),
> +    .class_init    = vmcoreinfo_device_class_init,
> +};
> +
> +static void vmcoreinfo_register_types(void)
> +{
> +    type_register_static(&vmcoreinfo_device_info);
> +}
> +
> +type_init(vmcoreinfo_register_types)
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 6b7bade183..7ac529680e 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -43,6 +43,7 @@
>  #include "sysemu/tpm.h"
>  #include "hw/acpi/tpm.h"
>  #include "hw/acpi/vmgenid.h"
> +#include "hw/acpi/vmcoreinfo.h"
>  #include "sysemu/tpm_backend.h"
>  #include "hw/timer/mc146818rtc_regs.h"
>  #include "sysemu/numa.h"
> @@ -2631,6 +2632,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
>      GArray *tables_blob = tables->table_data;
>      AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
>      Object *vmgenid_dev;
> +    Object *vmcoreinfo_dev;
>  
>      acpi_get_pm_info(&pm);
>      acpi_get_misc_info(&misc);
> @@ -2680,6 +2682,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
>          vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
>                             tables->vmgenid, tables->linker);
>      }
> +    vmcoreinfo_dev = find_vmcoreinfo_dev();
> +    if (vmcoreinfo_dev) {
> +        acpi_add_table(table_offsets, tables_blob);
> +        vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob,
> +                              tables->vmcoreinfo, tables->linker);
> +    }
>  
>      if (misc.has_hpet) {
>          acpi_add_table(table_offsets, tables_blob);
> @@ -2856,6 +2864,7 @@ void acpi_setup(void)
>      AcpiBuildTables tables;
>      AcpiBuildState *build_state;
>      Object *vmgenid_dev;
> +    Object *vmcoreinfo_dev;
>  
>      if (!pcms->fw_cfg) {
>          ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
> @@ -2897,6 +2906,11 @@ void acpi_setup(void)
>          vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
>                             tables.vmgenid);
>      }
> +    vmcoreinfo_dev = find_vmcoreinfo_dev();
> +    if (vmcoreinfo_dev) {
> +        vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg,
> +                              tables.vmcoreinfo);
> +    }
>  
>      if (!pcmc->rsdp_in_ram) {
>          /*
> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
> index 93e995d318..320dd3680d 100644
> --- a/default-configs/arm-softmmu.mak
> +++ b/default-configs/arm-softmmu.mak
> @@ -120,6 +120,7 @@ CONFIG_XIO3130=y
>  CONFIG_IOH3420=y
>  CONFIG_I82801B11=y
>  CONFIG_ACPI=y
> +CONFIG_ACPI_VMCOREINFO=y
>  CONFIG_SMBIOS=y
>  CONFIG_ASPEED_SOC=y
>  CONFIG_GPIO_KEY=y
> diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
> index d2ab2f6655..df68628895 100644
> --- a/default-configs/i386-softmmu.mak
> +++ b/default-configs/i386-softmmu.mak
> @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y
>  CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
>  CONFIG_PXB=y
>  CONFIG_ACPI_VMGENID=y
> +CONFIG_ACPI_VMCOREINFO=y
> diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
> index 9bde2f1c4b..e39ad5a680 100644
> --- a/default-configs/x86_64-softmmu.mak
> +++ b/default-configs/x86_64-softmmu.mak
> @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y
>  CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
>  CONFIG_PXB=y
>  CONFIG_ACPI_VMGENID=y
> +CONFIG_ACPI_VMCOREINFO=y
> diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt
> new file mode 100644
> index 0000000000..36d5a39ab1
> --- /dev/null
> +++ b/docs/specs/vmcoreinfo.txt
> @@ -0,0 +1,138 @@
> +VIRTUAL MACHINE COREINFO DEVICE
> +===============================
> +
> +Copyright (C) 2017 Red Hat, Inc.
> +
> +This work is licensed under the terms of the GNU GPL, version 2 or later.
> +See the COPYING file in the top-level directory.
> +
> +===
> +
> +The VM coreinfo (vmcoreinfo) device is an emulated device which
> +exposes a 4k memory range to the guest to store various informations
> +useful to debug the guest OS.
> +
> +QEMU Implementation
> +-------------------
> +
> +The vmcoreinfo device is put in its own ACPI descriptor table, in a
> +Secondary System Description Table, or SSDT.
> +
> +The following is a dump of the contents from a running system:
> +
> +# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
> +/*
> + * Intel ACPI Component Architecture
> + * AML/ASL+ Disassembler version 20160831-64
> + * Copyright (c) 2000 - 2016 Intel Corporation
> + *
> + * Disassembling to symbolic ASL+ operators
> + *
> + * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017
> + *
> + * Original Table Header:
> + *     Signature        "SSDT"
> + *     Length           0x00000086 (134)
> + *     Revision         0x01
> + *     Checksum         0x5C
> + *     OEM ID           "BOCHS "
> + *     OEM Table ID     "VMCOREIN"
> + *     OEM Revision     0x00000001 (1)
> + *     Compiler ID      "BXPC"
> + *     Compiler Version 0x00000001 (1)
> + */
> +DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001)
> +{
> +    Name (VCIA, 0x3FFFF000)
> +    Scope (\_SB)
> +    {
> +        Device (VMCI)
> +        {
> +            Name (_HID, "QEMUVMCI")  // _HID: Hardware ID
> +            Method (_STA, 0, NotSerialized)  // _STA: Status
> +            {
> +                Local0 = 0x0F
> +                If (VCIA == Zero)
> +                {
> +                    Local0 = Zero
> +                }
> +
> +                Return (Local0)
> +            }
> +
> +            Method (ADDR, 0, NotSerialized)
> +            {
> +                Local0 = Package (0x02) {}
> +                Local0 [Zero] = (VCIA + 0x24)
> +                Local0 [One] = Zero
> +                Return (Local0)
> +            }
> +        }
> +    }
> +}
> +
> +
> +Design Details:
> +---------------
> +
> +QEMU must be able to read the contents of the device memory,
> +specifically when starting a memory dump.  In order to do this, QEMU
> +must know the address that has been allocated.
> +
> +The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
> +These are data object that are visible to both QEMU and guests, and are
> +addressable as sequential files.
> +
> +More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
> +
> +Two fw_cfg blobs are used in this case:
> +
> +/etc/vmcoreinfo      - used to allocate memory range, read-only to the guest
> +/etc/vmcoreinfo-addr - contains the address of the allocated range
> +                     - writeable by the guest
> +
> +
> +QEMU sends the following commands to the guest at startup:
> +
> +1. Allocate memory for vmcoreinfo fw_cfg blob.
> +2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as
> +   shown above in the iasl dump).  Note that this change is not propagated
> +   back to QEMU.
> +3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr
> +   via the fw_cfg DMA interface.
> +
> +After step 3, QEMU is able to read the contents of vmcoreinfo.
> +
> +The value of VCIA is persisted via the VMState mechanism.
> +
> +
> +Storage Format:
> +---------------
> +
> +The content is expected to use little-endian format.
> +
> +In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
> +the vmcoreinfo blob has 36 bytes of padding:
> +
> ++-----------------------------------+
> +| SSDT with OEM Table ID = VMCOREIN |
> ++-----------------------------------+
> +| ...                               |       TOP OF PAGE
> +| VCIA dword object ----------------|-----> +---------------------------+
> +| ...                               |       | fw-allocated array for    |
> +| _STA method referring to VCIA     |       | "etc/vmcoreinfo"          |
> +| ...                               |       +---------------------------+
> +| ADDR method referring to VCIA     |       |  0: OVMF SDT Header probe |
> +| ...                               |       |     suppressor            |
> ++-----------------------------------+       | 36: uint32 version field  |
> +                                            | 40: info contents         |
> +                                            |     ....                  |
> +                                            +---------------------------+
> +                                            END OF PAGE
> +
> +Version 0 content:
> +
> + uint64 paddr:
> +  Physical address of the Linux vmcoreinfo ELF note.
> + uint32 size:
> +  Size of the vmcoreinfo ELF note.
> diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
> index 11c35bcb44..9623078f95 100644
> --- a/hw/acpi/Makefile.objs
> +++ b/hw/acpi/Makefile.objs
> @@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
>  common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
>  common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
>  common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
> +common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o
>  common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
>  
>  common-obj-y += acpi_interface.o
> -- 
> 2.13.1.395.gf7b71de06
Laszlo Ersek July 14, 2017, 8:04 p.m. UTC | #2
On 07/14/17 21:26, Michael S. Tsirkin wrote:
> On Fri, Jul 14, 2017 at 08:20:05PM +0200, Marc-André Lureau wrote:
>> The VM coreinfo (vmcoreinfo) device is an emulated device which
>> exposes a 4k memory range to the guest to store various informations
>> useful to debug the guest OS. (it is greatly inspired by the VMGENID
>> device implementation)
>>
>> This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
>> proposed in "[PATCH 00/21] WIP: dump: add kaslr support".
>>
>> A proof-of-concept kernel module:
>> https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c
>>
>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>> Reviewed-by: Laszlo Ersek <lersek@redhat.com>
> 
> It worries me that the format of this seems completely undefined
> except in the patchset cover letter.
> I don't think we should merge this before it is.

I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
That file is the first level contract between the guest firmware
(generally, via the linker/loader), the guest kernel driver
(specifically), and QEMU (also specifically).

The second level contract is the guest kernel's vmcoreinfo ELF note
(which is pointed-to by the first level contract). The layout of that is
specified elsewhere indeed (I don't think it belongs here).

We've taken care not to trust anything coming from the guest kernel.

Can you clarify please?

Thanks
Laszlo

> 
>> ---
>>  include/hw/acpi/aml-build.h        |   1 +
>>  include/hw/acpi/vmcoreinfo.h       |  37 +++++++
>>  hw/acpi/aml-build.c                |   2 +
>>  hw/acpi/vmcoreinfo.c               | 208 +++++++++++++++++++++++++++++++++++++
>>  hw/i386/acpi-build.c               |  14 +++
>>  default-configs/arm-softmmu.mak    |   1 +
>>  default-configs/i386-softmmu.mak   |   1 +
>>  default-configs/x86_64-softmmu.mak |   1 +
>>  docs/specs/vmcoreinfo.txt          | 138 ++++++++++++++++++++++++
>>  hw/acpi/Makefile.objs              |   1 +
>>  10 files changed, 404 insertions(+)
>>  create mode 100644 include/hw/acpi/vmcoreinfo.h
>>  create mode 100644 hw/acpi/vmcoreinfo.c
>>  create mode 100644 docs/specs/vmcoreinfo.txt
>>
>> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
>> index 88d0738d76..cf781bcd34 100644
>> --- a/include/hw/acpi/aml-build.h
>> +++ b/include/hw/acpi/aml-build.h
>> @@ -211,6 +211,7 @@ struct AcpiBuildTables {
>>      GArray *rsdp;
>>      GArray *tcpalog;
>>      GArray *vmgenid;
>> +    GArray *vmcoreinfo;
>>      BIOSLinker *linker;
>>  } AcpiBuildTables;
>>  
>> diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
>> new file mode 100644
>> index 0000000000..6a73bcd1b2
>> --- /dev/null
>> +++ b/include/hw/acpi/vmcoreinfo.h
>> @@ -0,0 +1,37 @@
>> +#ifndef ACPI_VMCOREINFO_H
>> +#define ACPI_VMCOREINFO_H
>> +
>> +#include "hw/acpi/bios-linker-loader.h"
>> +#include "hw/qdev.h"
>> +
>> +#define VMCOREINFO_DEVICE           "vmcoreinfo"
>> +#define VMCOREINFO_FW_CFG_FILE      "etc/vmcoreinfo"
>> +#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
>> +
>> +/* Occupy a page of memory */
>> +#define VMCOREINFO_FW_CFG_SIZE      4096
>> +
>> +/* allow space for OVMF SDT Header Probe Supressor */
>> +#define VMCOREINFO_OFFSET           sizeof(AcpiTableHeader)
>> +
>> +#define VMCOREINFO(obj) OBJECT_CHECK(VMCoreInfoState, (obj), VMCOREINFO_DEVICE)
>> +
>> +typedef struct VMCoreInfoState {
>> +    DeviceClass parent_obj;
>> +    uint8_t vmcoreinfo_addr_le[8];   /* Address of memory region */
>> +    bool write_pointer_available;
>> +} VMCoreInfoState;
>> +
>> +/* returns NULL unless there is exactly one device */
>> +static inline Object *find_vmcoreinfo_dev(void)
>> +{
>> +    return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
>> +}
>> +
>> +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
>> +                           GArray *vmci, BIOSLinker *linker);
>> +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci);
>> +bool vmcoreinfo_get(VMCoreInfoState *vis, uint64_t *paddr, uint32_t *size,
>> +                    Error **errp);
>> +
>> +#endif
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index 36a6cc450e..47043ade4a 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
>>      tables->table_data = g_array_new(false, true /* clear */, 1);
>>      tables->tcpalog = g_array_new(false, true /* clear */, 1);
>>      tables->vmgenid = g_array_new(false, true /* clear */, 1);
>> +    tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
>>      tables->linker = bios_linker_loader_init();
>>  }
>>  
>> @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
>>      g_array_free(tables->table_data, true);
>>      g_array_free(tables->tcpalog, mfre);
>>      g_array_free(tables->vmgenid, mfre);
>> +    g_array_free(tables->vmcoreinfo, mfre);
>>  }
>>  
>>  /* Build rsdt table */
>> diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
>> new file mode 100644
>> index 0000000000..0ea41de8d9
>> --- /dev/null
>> +++ b/hw/acpi/vmcoreinfo.c
>> @@ -0,0 +1,208 @@
>> +/*
>> + *  Virtual Machine coreinfo device
>> + *  (based on Virtual Machine Generation ID Device)
>> + *
>> + *  Copyright (C) 2017 Red Hat, Inc.
>> + *  Copyright (C) 2017 Skyport Systems.
>> + *
>> + *  Authors: Marc-André Lureau <marcandre.lureau@redhat.com>
>> + *           Ben Warren <ben@skyportsystems.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +#include "qemu/osdep.h"
>> +#include "hw/acpi/acpi.h"
>> +#include "hw/acpi/aml-build.h"
>> +#include "hw/acpi/vmcoreinfo.h"
>> +#include "hw/nvram/fw_cfg.h"
>> +#include "sysemu/sysemu.h"
>> +#include "qapi/error.h"
>> +
>> +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
>> +                           GArray *vmci, BIOSLinker *linker)
>> +{
>> +    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
>> +    uint32_t vcia_offset;
>> +
>> +    g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
>> +
>> +    /* Put this in a separate SSDT table */
>> +    ssdt = init_aml_allocator();
>> +
>> +    /* Reserve space for header */
>> +    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
>> +
>> +    /* Storage address */
>> +    vcia_offset = table_data->len +
>> +        build_append_named_dword(ssdt->buf, "VCIA");
>> +    scope = aml_scope("\\_SB");
>> +    dev = aml_device("VMCI");
>> +    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
>> +
>> +    /* Simple status method to check that address is linked and non-zero */
>> +    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
>> +    addr = aml_local(0);
>> +    aml_append(method, aml_store(aml_int(0xf), addr));
>> +    if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
>> +    aml_append(if_ctx, aml_store(aml_int(0), addr));
>> +    aml_append(method, if_ctx);
>> +    aml_append(method, aml_return(addr));
>> +    aml_append(dev, method);
>> +
>> +    /* the ADDR method returns two 32-bit words representing the lower and
>> +     * upper halves of the physical address of the vmcoreinfo area
>> +     */
>> +    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
>> +
>> +    addr = aml_local(0);
>> +    aml_append(method, aml_store(aml_package(2), addr));
>> +
>> +    aml_append(method, aml_store(aml_add(aml_name("VCIA"),
>> +                                         aml_int(VMCOREINFO_OFFSET), NULL),
>> +                                 aml_index(addr, aml_int(0))));
>> +    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
>> +    aml_append(method, aml_return(addr));
>> +
>> +    aml_append(dev, method);
>> +    aml_append(scope, dev);
>> +    aml_append(ssdt, scope);
>> +
>> +    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
>> +
>> +    /* Allocate guest memory */
>> +    bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
>> +                             false /* page boundary, high memory */);
>> +
>> +    /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
>> +     * blob so QEMU can read the info from there.  The address is
>> +     * expected to be < 4GB, but write 64 bits anyway.
>> +     * The address that is patched in is offset in order to implement
>> +     * the "OVMF SDT Header probe suppressor"
>> +     * see docs/specs/vmcoreinfo.txt for more details.
>> +     */
>> +    bios_linker_loader_write_pointer(linker,
>> +        VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
>> +        VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
>> +
>> +    /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
>> +     * and read it.  Note that while we provide storage for 64 bits, only
>> +     * the least-signficant 32 get patched into AML.
>> +     */
>> +    bios_linker_loader_add_pointer(linker,
>> +        ACPI_BUILD_TABLE_FILE, vcia_offset, sizeof(uint32_t),
>> +        VMCOREINFO_FW_CFG_FILE, 0);
>> +
>> +    build_header(linker, table_data,
>> +        (void *)(table_data->data + table_data->len - ssdt->buf->len),
>> +        "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
>> +    free_aml_allocator();
>> +}
>> +
>> +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci)
>> +{
>> +    /* Create a read-only fw_cfg file for vmcoreinfo allocation */
>> +    /* XXX: linker could learn to allocate without backing fw_cfg? */
>> +    fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
>> +                    VMCOREINFO_FW_CFG_SIZE);
>> +    /* Create a read-write fw_cfg file for Address */
>> +    fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
>> +                             vis->vmcoreinfo_addr_le,
>> +                             ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
>> +}
>> +
>> +bool vmcoreinfo_get(VMCoreInfoState *vis,
>> +                    uint64_t *paddr, uint32_t *size,
>> +                    Error **errp)
>> +{
>> +    uint32_t vmcoreinfo_addr;
>> +    uint32_t version;
>> +
>> +    assert(vis);
>> +    assert(paddr);
>> +    assert(size);
>> +
>> +    memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, sizeof(vmcoreinfo_addr));
>> +    vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
>> +    if (!vmcoreinfo_addr) {
>> +        error_setg(errp, "BIOS has not yet written the address of %s",
>> +                   VMCOREINFO_DEVICE);
>> +        return false;
>> +    }
>> +
>> +    cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
>> +    if (version != 0) {
>> +        error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
>> +        return false;
>> +    }
>> +
>> +    cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(*paddr));
>> +    *paddr = le64_to_cpu(*paddr);
>> +    cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(*size));
>> +    *size = le32_to_cpu(*size);
>> +
>> +    return true;
>> +}
>> +
>> +static const VMStateDescription vmstate_vmcoreinfo = {
>> +    .name = "vmcoreinfo",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VMCoreInfoState, sizeof(uint64_t)),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +static void vmcoreinfo_handle_reset(void *opaque)
>> +{
>> +    VMCoreInfoState *vis = VMCOREINFO(opaque);
>> +
>> +    /* Clear the guest-allocated address when the VM resets */
>> +    memset(vis->vmcoreinfo_addr_le, 0, ARRAY_SIZE(vis->vmcoreinfo_addr_le));
>> +}
>> +
>> +static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
>> +{
>> +    if (!bios_linker_loader_can_write_pointer()) {
>> +        error_setg(errp, "%s requires DMA write support in fw_cfg, "
>> +                   "which this machine type does not provide",
>> +                   VMCOREINFO_DEVICE);
>> +        return;
>> +    }
>> +
>> +    /* Given that this function is executing, there is at least one VMCOREINFO
>> +     * device. Check if there are several.
>> +     */
>> +    if (!find_vmcoreinfo_dev()) {
>> +        error_setg(errp, "at most one %s device is permitted",
>> +                   VMCOREINFO_DEVICE);
>> +        return;
>> +    }
>> +
>> +    qemu_register_reset(vmcoreinfo_handle_reset, dev);
>> +}
>> +
>> +static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +    dc->vmsd = &vmstate_vmcoreinfo;
>> +    dc->realize = vmcoreinfo_realize;
>> +    dc->hotpluggable = false;
>> +}
>> +
>> +static const TypeInfo vmcoreinfo_device_info = {
>> +    .name          = VMCOREINFO_DEVICE,
>> +    .parent        = TYPE_DEVICE,
>> +    .instance_size = sizeof(VMCoreInfoState),
>> +    .class_init    = vmcoreinfo_device_class_init,
>> +};
>> +
>> +static void vmcoreinfo_register_types(void)
>> +{
>> +    type_register_static(&vmcoreinfo_device_info);
>> +}
>> +
>> +type_init(vmcoreinfo_register_types)
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index 6b7bade183..7ac529680e 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -43,6 +43,7 @@
>>  #include "sysemu/tpm.h"
>>  #include "hw/acpi/tpm.h"
>>  #include "hw/acpi/vmgenid.h"
>> +#include "hw/acpi/vmcoreinfo.h"
>>  #include "sysemu/tpm_backend.h"
>>  #include "hw/timer/mc146818rtc_regs.h"
>>  #include "sysemu/numa.h"
>> @@ -2631,6 +2632,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
>>      GArray *tables_blob = tables->table_data;
>>      AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
>>      Object *vmgenid_dev;
>> +    Object *vmcoreinfo_dev;
>>  
>>      acpi_get_pm_info(&pm);
>>      acpi_get_misc_info(&misc);
>> @@ -2680,6 +2682,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
>>          vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
>>                             tables->vmgenid, tables->linker);
>>      }
>> +    vmcoreinfo_dev = find_vmcoreinfo_dev();
>> +    if (vmcoreinfo_dev) {
>> +        acpi_add_table(table_offsets, tables_blob);
>> +        vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob,
>> +                              tables->vmcoreinfo, tables->linker);
>> +    }
>>  
>>      if (misc.has_hpet) {
>>          acpi_add_table(table_offsets, tables_blob);
>> @@ -2856,6 +2864,7 @@ void acpi_setup(void)
>>      AcpiBuildTables tables;
>>      AcpiBuildState *build_state;
>>      Object *vmgenid_dev;
>> +    Object *vmcoreinfo_dev;
>>  
>>      if (!pcms->fw_cfg) {
>>          ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
>> @@ -2897,6 +2906,11 @@ void acpi_setup(void)
>>          vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
>>                             tables.vmgenid);
>>      }
>> +    vmcoreinfo_dev = find_vmcoreinfo_dev();
>> +    if (vmcoreinfo_dev) {
>> +        vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg,
>> +                              tables.vmcoreinfo);
>> +    }
>>  
>>      if (!pcmc->rsdp_in_ram) {
>>          /*
>> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
>> index 93e995d318..320dd3680d 100644
>> --- a/default-configs/arm-softmmu.mak
>> +++ b/default-configs/arm-softmmu.mak
>> @@ -120,6 +120,7 @@ CONFIG_XIO3130=y
>>  CONFIG_IOH3420=y
>>  CONFIG_I82801B11=y
>>  CONFIG_ACPI=y
>> +CONFIG_ACPI_VMCOREINFO=y
>>  CONFIG_SMBIOS=y
>>  CONFIG_ASPEED_SOC=y
>>  CONFIG_GPIO_KEY=y
>> diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
>> index d2ab2f6655..df68628895 100644
>> --- a/default-configs/i386-softmmu.mak
>> +++ b/default-configs/i386-softmmu.mak
>> @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y
>>  CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
>>  CONFIG_PXB=y
>>  CONFIG_ACPI_VMGENID=y
>> +CONFIG_ACPI_VMCOREINFO=y
>> diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
>> index 9bde2f1c4b..e39ad5a680 100644
>> --- a/default-configs/x86_64-softmmu.mak
>> +++ b/default-configs/x86_64-softmmu.mak
>> @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y
>>  CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
>>  CONFIG_PXB=y
>>  CONFIG_ACPI_VMGENID=y
>> +CONFIG_ACPI_VMCOREINFO=y
>> diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt
>> new file mode 100644
>> index 0000000000..36d5a39ab1
>> --- /dev/null
>> +++ b/docs/specs/vmcoreinfo.txt
>> @@ -0,0 +1,138 @@
>> +VIRTUAL MACHINE COREINFO DEVICE
>> +===============================
>> +
>> +Copyright (C) 2017 Red Hat, Inc.
>> +
>> +This work is licensed under the terms of the GNU GPL, version 2 or later.
>> +See the COPYING file in the top-level directory.
>> +
>> +===
>> +
>> +The VM coreinfo (vmcoreinfo) device is an emulated device which
>> +exposes a 4k memory range to the guest to store various informations
>> +useful to debug the guest OS.
>> +
>> +QEMU Implementation
>> +-------------------
>> +
>> +The vmcoreinfo device is put in its own ACPI descriptor table, in a
>> +Secondary System Description Table, or SSDT.
>> +
>> +The following is a dump of the contents from a running system:
>> +
>> +# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
>> +/*
>> + * Intel ACPI Component Architecture
>> + * AML/ASL+ Disassembler version 20160831-64
>> + * Copyright (c) 2000 - 2016 Intel Corporation
>> + *
>> + * Disassembling to symbolic ASL+ operators
>> + *
>> + * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017
>> + *
>> + * Original Table Header:
>> + *     Signature        "SSDT"
>> + *     Length           0x00000086 (134)
>> + *     Revision         0x01
>> + *     Checksum         0x5C
>> + *     OEM ID           "BOCHS "
>> + *     OEM Table ID     "VMCOREIN"
>> + *     OEM Revision     0x00000001 (1)
>> + *     Compiler ID      "BXPC"
>> + *     Compiler Version 0x00000001 (1)
>> + */
>> +DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001)
>> +{
>> +    Name (VCIA, 0x3FFFF000)
>> +    Scope (\_SB)
>> +    {
>> +        Device (VMCI)
>> +        {
>> +            Name (_HID, "QEMUVMCI")  // _HID: Hardware ID
>> +            Method (_STA, 0, NotSerialized)  // _STA: Status
>> +            {
>> +                Local0 = 0x0F
>> +                If (VCIA == Zero)
>> +                {
>> +                    Local0 = Zero
>> +                }
>> +
>> +                Return (Local0)
>> +            }
>> +
>> +            Method (ADDR, 0, NotSerialized)
>> +            {
>> +                Local0 = Package (0x02) {}
>> +                Local0 [Zero] = (VCIA + 0x24)
>> +                Local0 [One] = Zero
>> +                Return (Local0)
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +
>> +Design Details:
>> +---------------
>> +
>> +QEMU must be able to read the contents of the device memory,
>> +specifically when starting a memory dump.  In order to do this, QEMU
>> +must know the address that has been allocated.
>> +
>> +The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
>> +These are data object that are visible to both QEMU and guests, and are
>> +addressable as sequential files.
>> +
>> +More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
>> +
>> +Two fw_cfg blobs are used in this case:
>> +
>> +/etc/vmcoreinfo      - used to allocate memory range, read-only to the guest
>> +/etc/vmcoreinfo-addr - contains the address of the allocated range
>> +                     - writeable by the guest
>> +
>> +
>> +QEMU sends the following commands to the guest at startup:
>> +
>> +1. Allocate memory for vmcoreinfo fw_cfg blob.
>> +2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as
>> +   shown above in the iasl dump).  Note that this change is not propagated
>> +   back to QEMU.
>> +3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr
>> +   via the fw_cfg DMA interface.
>> +
>> +After step 3, QEMU is able to read the contents of vmcoreinfo.
>> +
>> +The value of VCIA is persisted via the VMState mechanism.
>> +
>> +
>> +Storage Format:
>> +---------------
>> +
>> +The content is expected to use little-endian format.
>> +
>> +In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
>> +the vmcoreinfo blob has 36 bytes of padding:
>> +
>> ++-----------------------------------+
>> +| SSDT with OEM Table ID = VMCOREIN |
>> ++-----------------------------------+
>> +| ...                               |       TOP OF PAGE
>> +| VCIA dword object ----------------|-----> +---------------------------+
>> +| ...                               |       | fw-allocated array for    |
>> +| _STA method referring to VCIA     |       | "etc/vmcoreinfo"          |
>> +| ...                               |       +---------------------------+
>> +| ADDR method referring to VCIA     |       |  0: OVMF SDT Header probe |
>> +| ...                               |       |     suppressor            |
>> ++-----------------------------------+       | 36: uint32 version field  |
>> +                                            | 40: info contents         |
>> +                                            |     ....                  |
>> +                                            +---------------------------+
>> +                                            END OF PAGE
>> +
>> +Version 0 content:
>> +
>> + uint64 paddr:
>> +  Physical address of the Linux vmcoreinfo ELF note.
>> + uint32 size:
>> +  Size of the vmcoreinfo ELF note.
>> diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
>> index 11c35bcb44..9623078f95 100644
>> --- a/hw/acpi/Makefile.objs
>> +++ b/hw/acpi/Makefile.objs
>> @@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
>>  common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
>>  common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
>>  common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
>> +common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o
>>  common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
>>  
>>  common-obj-y += acpi_interface.o
>> -- 
>> 2.13.1.395.gf7b71de06
Michael S. Tsirkin July 14, 2017, 8:17 p.m. UTC | #3
On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
> > It worries me that the format of this seems completely undefined
> > except in the patchset cover letter.
> > I don't think we should merge this before it is.
> 
> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
> That file is the first level contract between the guest firmware
> (generally, via the linker/loader), the guest kernel driver
> (specifically), and QEMU (also specifically).
> 
> The second level contract is the guest kernel's vmcoreinfo ELF note
> (which is pointed-to by the first level contract). The layout of that is
> specified elsewhere indeed (I don't think it belongs here).
> 
> We've taken care not to trust anything coming from the guest kernel.
> 
> Can you clarify please?
> 
> Thanks
> Laszlo

All there is is this:

+Version 0 content:
+
+ uint64 paddr:
+  Physical address of the Linux vmcoreinfo ELF note.
+ uint32 size:
+  Size of the vmcoreinfo ELF note.

It isn't defined here what is the Linux vmcoreinfo ELF note.
You want a bit more info so people trying to use it know where to start
and what they can get out of it.
Marc-André Lureau July 14, 2017, 10:12 p.m. UTC | #4
Hi

On Fri, Jul 14, 2017 at 10:17 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
>> > It worries me that the format of this seems completely undefined
>> > except in the patchset cover letter.
>> > I don't think we should merge this before it is.
>>
>> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
>> That file is the first level contract between the guest firmware
>> (generally, via the linker/loader), the guest kernel driver
>> (specifically), and QEMU (also specifically).
>>
>> The second level contract is the guest kernel's vmcoreinfo ELF note
>> (which is pointed-to by the first level contract). The layout of that is
>> specified elsewhere indeed (I don't think it belongs here).
>>
>> We've taken care not to trust anything coming from the guest kernel.
>>
>> Can you clarify please?
>>
>> Thanks
>> Laszlo
>
> All there is is this:
>
> +Version 0 content:
> +
> + uint64 paddr:
> +  Physical address of the Linux vmcoreinfo ELF note.
> + uint32 size:
> +  Size of the vmcoreinfo ELF note.
>
> It isn't defined here what is the Linux vmcoreinfo ELF note.
> You want a bit more info so people trying to use it know where to start
> and what they can get out of it.

QEMU is not responsible for the content of the ELF note. All there is afaik is:

https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo

The rest you need to dig in the kernel code and git history I think.
Michael S. Tsirkin July 14, 2017, 11:09 p.m. UTC | #5
On Sat, Jul 15, 2017 at 12:12:06AM +0200, Marc-André Lureau wrote:
> Hi
> 
> On Fri, Jul 14, 2017 at 10:17 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
> >> > It worries me that the format of this seems completely undefined
> >> > except in the patchset cover letter.
> >> > I don't think we should merge this before it is.
> >>
> >> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
> >> That file is the first level contract between the guest firmware
> >> (generally, via the linker/loader), the guest kernel driver
> >> (specifically), and QEMU (also specifically).
> >>
> >> The second level contract is the guest kernel's vmcoreinfo ELF note
> >> (which is pointed-to by the first level contract). The layout of that is
> >> specified elsewhere indeed (I don't think it belongs here).
> >>
> >> We've taken care not to trust anything coming from the guest kernel.
> >>
> >> Can you clarify please?
> >>
> >> Thanks
> >> Laszlo
> >
> > All there is is this:
> >
> > +Version 0 content:
> > +
> > + uint64 paddr:
> > +  Physical address of the Linux vmcoreinfo ELF note.
> > + uint32 size:
> > +  Size of the vmcoreinfo ELF note.
> >
> > It isn't defined here what is the Linux vmcoreinfo ELF note.
> > You want a bit more info so people trying to use it know where to start
> > and what they can get out of it.
> 
> QEMU is not responsible for the content of the ELF note. All there is afaik is:
> 
> https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
> 
> The rest you need to dig in the kernel code and git history I think.

We need to do a good enough job to at least make it possible for people
to make use of it without reading your python code.

Documentation/kdump/kdump.txt has a bit more information.

	All of the necessary information about the system kernel's core image is
	encoded in the ELF format, and stored in a reserved area of memory
	before a crash. The physical address of the start of the ELF header is
	passed to the dump-capture kernel through the elfcorehdr= boot
	parameter. Optionally the size of the ELF header can also be passed
	when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.


	With the dump-capture kernel, you can access the memory image through
	/proc/vmcore. This exports the dump as an ELF-format file that you can
	write out using file copy commands such as cp or scp. Further, you can
	use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
	debug the dump file. This method ensures that the dump pages are correctly
	ordered.

I know where to find it but most people likely won't be able to.

BTW why does it pass ELF header size? Any idea?


> -- 
> Marc-André Lureau
Marc-André Lureau July 14, 2017, 11:30 p.m. UTC | #6
Hi

On Sat, Jul 15, 2017 at 1:09 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Sat, Jul 15, 2017 at 12:12:06AM +0200, Marc-André Lureau wrote:
> > Hi
> >
> > On Fri, Jul 14, 2017 at 10:17 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > > On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
> > >> > It worries me that the format of this seems completely undefined
> > >> > except in the patchset cover letter.
> > >> > I don't think we should merge this before it is.
> > >>
> > >> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
> > >> That file is the first level contract between the guest firmware
> > >> (generally, via the linker/loader), the guest kernel driver
> > >> (specifically), and QEMU (also specifically).
> > >>
> > >> The second level contract is the guest kernel's vmcoreinfo ELF note
> > >> (which is pointed-to by the first level contract). The layout of that is
> > >> specified elsewhere indeed (I don't think it belongs here).
> > >>
> > >> We've taken care not to trust anything coming from the guest kernel.
> > >>
> > >> Can you clarify please?
> > >>
> > >> Thanks
> > >> Laszlo
> > >
> > > All there is is this:
> > >
> > > +Version 0 content:
> > > +
> > > + uint64 paddr:
> > > +  Physical address of the Linux vmcoreinfo ELF note.
> > > + uint32 size:
> > > +  Size of the vmcoreinfo ELF note.
> > >
> > > It isn't defined here what is the Linux vmcoreinfo ELF note.
> > > You want a bit more info so people trying to use it know where to start
> > > and what they can get out of it.
> >
> > QEMU is not responsible for the content of the ELF note. All there is afaik is:
> >
> > https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
> >
> > The rest you need to dig in the kernel code and git history I think.
>
> We need to do a good enough job to at least make it possible for people
> to make use of it without reading your python code.
>
> Documentation/kdump/kdump.txt has a bit more information.
>
>         All of the necessary information about the system kernel's core image is
>         encoded in the ELF format, and stored in a reserved area of memory
>         before a crash. The physical address of the start of the ELF header is
>         passed to the dump-capture kernel through the elfcorehdr= boot
>         parameter. Optionally the size of the ELF header can also be passed
>         when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
>
>
>         With the dump-capture kernel, you can access the memory image through
>         /proc/vmcore. This exports the dump as an ELF-format file that you can
>         write out using file copy commands such as cp or scp. Further, you can
>         use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
>         debug the dump file. This method ensures that the dump pages are correctly
>         ordered.
>
> I know where to find it but most people likely won't be able to.
>

What do you learn from that regarding vmcoreinfo? That is it being
used by kdump? and that you can use gdb/crash, that we already use to
analyse our dumps.

>
> BTW why does it pass ELF header size? Any idea?


https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d3bf37955d46718ee1a7f1fc69f953d2328ba7c2
Michael S. Tsirkin July 14, 2017, 11:40 p.m. UTC | #7
On Sat, Jul 15, 2017 at 01:30:56AM +0200, Marc-André Lureau wrote:
> Hi
> 
> On Sat, Jul 15, 2017 at 1:09 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Sat, Jul 15, 2017 at 12:12:06AM +0200, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Fri, Jul 14, 2017 at 10:17 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
> > > >> > It worries me that the format of this seems completely undefined
> > > >> > except in the patchset cover letter.
> > > >> > I don't think we should merge this before it is.
> > > >>
> > > >> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
> > > >> That file is the first level contract between the guest firmware
> > > >> (generally, via the linker/loader), the guest kernel driver
> > > >> (specifically), and QEMU (also specifically).
> > > >>
> > > >> The second level contract is the guest kernel's vmcoreinfo ELF note
> > > >> (which is pointed-to by the first level contract). The layout of that is
> > > >> specified elsewhere indeed (I don't think it belongs here).
> > > >>
> > > >> We've taken care not to trust anything coming from the guest kernel.
> > > >>
> > > >> Can you clarify please?
> > > >>
> > > >> Thanks
> > > >> Laszlo
> > > >
> > > > All there is is this:
> > > >
> > > > +Version 0 content:
> > > > +
> > > > + uint64 paddr:
> > > > +  Physical address of the Linux vmcoreinfo ELF note.
> > > > + uint32 size:
> > > > +  Size of the vmcoreinfo ELF note.
> > > >
> > > > It isn't defined here what is the Linux vmcoreinfo ELF note.
> > > > You want a bit more info so people trying to use it know where to start
> > > > and what they can get out of it.
> > >
> > > QEMU is not responsible for the content of the ELF note. All there is afaik is:
> > >
> > > https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
> > >
> > > The rest you need to dig in the kernel code and git history I think.
> >
> > We need to do a good enough job to at least make it possible for people
> > to make use of it without reading your python code.
> >
> > Documentation/kdump/kdump.txt has a bit more information.
> >
> >         All of the necessary information about the system kernel's core image is
> >         encoded in the ELF format, and stored in a reserved area of memory
> >         before a crash. The physical address of the start of the ELF header is
> >         passed to the dump-capture kernel through the elfcorehdr= boot
> >         parameter. Optionally the size of the ELF header can also be passed
> >         when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
> >
> >
> >         With the dump-capture kernel, you can access the memory image through
> >         /proc/vmcore. This exports the dump as an ELF-format file that you can
> >         write out using file copy commands such as cp or scp. Further, you can
> >         use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
> >         debug the dump file. This method ensures that the dump pages are correctly
> >         ordered.
> >
> > I know where to find it but most people likely won't be able to.
> >
> 
> What do you learn from that regarding vmcoreinfo? That is it being
> used by kdump? and that you can use gdb/crash, that we already use to
> analyse our dumps.

Some things I learn:

That its written by a dump-capture kernel (so you need to set one up
if you want it to work!).

That it's an ELF-format file, that one can use analysis tools such as
the GNU Debugger (GDB) and the Crash tool to debug the dump file.

There's more info scattered in other places.

Why do you get to document it? Because you are the one exposing it
across the hypervisor/vm boundary where it will need to be
understood by people/tools not running within guest.

So "just read the script in qemu source" is not how an interface
should be documented.

> >
> > BTW why does it pass ELF header size? Any idea?
> 
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d3bf37955d46718ee1a7f1fc69f953d2328ba7c2

Right but I mean, ELF header has two options: 32 and 64 bit, that's all.
Marc-André Lureau July 14, 2017, 11:47 p.m. UTC | #8
On Sat, Jul 15, 2017 at 1:40 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sat, Jul 15, 2017 at 01:30:56AM +0200, Marc-André Lureau wrote:
>> Hi
>>
>> On Sat, Jul 15, 2017 at 1:09 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>> >
>> > On Sat, Jul 15, 2017 at 12:12:06AM +0200, Marc-André Lureau wrote:
>> > > Hi
>> > >
>> > > On Fri, Jul 14, 2017 at 10:17 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > > > On Fri, Jul 14, 2017 at 10:04:31PM +0200, Laszlo Ersek wrote:
>> > > >> > It worries me that the format of this seems completely undefined
>> > > >> > except in the patchset cover letter.
>> > > >> > I don't think we should merge this before it is.
>> > > >>
>> > > >> I'm not sure what you mean, this patch adds "docs/specs/vmcoreinfo.txt".
>> > > >> That file is the first level contract between the guest firmware
>> > > >> (generally, via the linker/loader), the guest kernel driver
>> > > >> (specifically), and QEMU (also specifically).
>> > > >>
>> > > >> The second level contract is the guest kernel's vmcoreinfo ELF note
>> > > >> (which is pointed-to by the first level contract). The layout of that is
>> > > >> specified elsewhere indeed (I don't think it belongs here).
>> > > >>
>> > > >> We've taken care not to trust anything coming from the guest kernel.
>> > > >>
>> > > >> Can you clarify please?
>> > > >>
>> > > >> Thanks
>> > > >> Laszlo
>> > > >
>> > > > All there is is this:
>> > > >
>> > > > +Version 0 content:
>> > > > +
>> > > > + uint64 paddr:
>> > > > +  Physical address of the Linux vmcoreinfo ELF note.
>> > > > + uint32 size:
>> > > > +  Size of the vmcoreinfo ELF note.
>> > > >
>> > > > It isn't defined here what is the Linux vmcoreinfo ELF note.
>> > > > You want a bit more info so people trying to use it know where to start
>> > > > and what they can get out of it.
>> > >
>> > > QEMU is not responsible for the content of the ELF note. All there is afaik is:
>> > >
>> > > https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
>> > >
>> > > The rest you need to dig in the kernel code and git history I think.
>> >
>> > We need to do a good enough job to at least make it possible for people
>> > to make use of it without reading your python code.
>> >
>> > Documentation/kdump/kdump.txt has a bit more information.
>> >
>> >         All of the necessary information about the system kernel's core image is
>> >         encoded in the ELF format, and stored in a reserved area of memory
>> >         before a crash. The physical address of the start of the ELF header is
>> >         passed to the dump-capture kernel through the elfcorehdr= boot
>> >         parameter. Optionally the size of the ELF header can also be passed
>> >         when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
>> >
>> >
>> >         With the dump-capture kernel, you can access the memory image through
>> >         /proc/vmcore. This exports the dump as an ELF-format file that you can
>> >         write out using file copy commands such as cp or scp. Further, you can
>> >         use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
>> >         debug the dump file. This method ensures that the dump pages are correctly
>> >         ordered.
>> >
>> > I know where to find it but most people likely won't be able to.
>> >
>>
>> What do you learn from that regarding vmcoreinfo? That is it being
>> used by kdump? and that you can use gdb/crash, that we already use to
>> analyse our dumps.
>
> Some things I learn:
>
> That its written by a dump-capture kernel (so you need to set one up
> if you want it to work!).

I don't follow you, you don't need a dump-capture kernel / kdump to
have the vmcoreinfo note.

>
> That it's an ELF-format file, that one can use analysis tools such as
> the GNU Debugger (GDB) and the Crash tool to debug the dump file.

Well, that's what kdump does, but qemu is the one making the dump in
our case. Quite irrelevant for us.

>
> There's more info scattered in other places.
>
> Why do you get to document it? Because you are the one exposing it
> across the hypervisor/vm boundary where it will need to be
> understood by people/tools not running within guest.
>
> So "just read the script in qemu source" is not how an interface
> should be documented.

I don't understand the issue, it's a kernel ELF note that qemu passes
for dump/crash tools in the dump headers/sections.

>
>> >
>> > BTW why does it pass ELF header size? Any idea?
>>
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d3bf37955d46718ee1a7f1fc69f953d2328ba7c2
>
> Right but I mean, ELF header has two options: 32 and 64 bit, that's all.
>
> --
> MST
Michael S. Tsirkin July 26, 2017, 5:21 p.m. UTC | #9
On Sat, Jul 15, 2017 at 01:47:50AM +0200, Marc-André Lureau wrote:
> >
> > There's more info scattered in other places.
> >
> > Why do you get to document it? Because you are the one exposing it
> > across the hypervisor/vm boundary where it will need to be
> > understood by people/tools not running within guest.
> >
> > So "just read the script in qemu source" is not how an interface
> > should be documented.
> 
> I don't understand the issue, it's a kernel ELF note that qemu passes
> for dump/crash tools in the dump headers/sections.

The way it looks to me, this patchset is exposing an internal kernel
detail and making it part of ABI maybe it already is, my point was 1.
should we get a confirmation from upstream it's not going to change? 2.
if it's ABI let's document what do we expect to be there.

But again since there's not a whole lot of documentation here
that you provided, I might be misunderstanding completely.


> >
> >> >
> >> > BTW why does it pass ELF header size? Any idea?
> >>
> >>
> >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d3bf37955d46718ee1a7f1fc69f953d2328ba7c2
> >
> > Right but I mean, ELF header has two options: 32 and 64 bit, that's all.
> >
> > --
> > MST
> 
> 
> 
> -- 
> Marc-André Lureau
Marc-André Lureau July 28, 2017, 2:52 p.m. UTC | #10
Hi Dave

On Wed, Jul 26, 2017 at 10:21 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sat, Jul 15, 2017 at 01:47:50AM +0200, Marc-André Lureau wrote:
>> >
>> > There's more info scattered in other places.
>> >
>> > Why do you get to document it? Because you are the one exposing it
>> > across the hypervisor/vm boundary where it will need to be
>> > understood by people/tools not running within guest.
>> >
>> > So "just read the script in qemu source" is not how an interface
>> > should be documented.
>>
>> I don't understand the issue, it's a kernel ELF note that qemu passes
>> for dump/crash tools in the dump headers/sections.
>
> The way it looks to me, this patchset is exposing an internal kernel
> detail and making it part of ABI maybe it already is, my point was 1.
> should we get a confirmation from upstream it's not going to change? 2.
> if it's ABI let's document what do we expect to be there.


Could you help explain the expectations and stability guarantees of
vmcoreinfo ELF note ?

I am a bit stuck here, after all, vmcoreinfo is mostly used by crash
so I thought you could help.

The only thing qemu does with it is try to get NUMBER(phys_base)=
field to update the phys_base used in the various dump headers. (this
could be dropped, and qemu ignoring the note content, if the debug
tools take vmcoreinfo values  with higher priority than other header
fields)

> But again since there's not a whole lot of documentation here
> that you provided, I might be misunderstanding completely.

Because there isn't much available in the kernel either, except
Documentation/ABI/testing/sysfs-kernel-vmcoreinfo.
Laszlo Ersek July 28, 2017, 3:55 p.m. UTC | #11
On 07/28/17 16:52, Marc-André Lureau wrote:
> Hi Dave
> 
> On Wed, Jul 26, 2017 at 10:21 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Sat, Jul 15, 2017 at 01:47:50AM +0200, Marc-André Lureau wrote:
>>>>
>>>> There's more info scattered in other places.
>>>>
>>>> Why do you get to document it? Because you are the one exposing it
>>>> across the hypervisor/vm boundary where it will need to be
>>>> understood by people/tools not running within guest.
>>>>
>>>> So "just read the script in qemu source" is not how an interface
>>>> should be documented.
>>>
>>> I don't understand the issue, it's a kernel ELF note that qemu passes
>>> for dump/crash tools in the dump headers/sections.
>>
>> The way it looks to me, this patchset is exposing an internal kernel
>> detail and making it part of ABI maybe it already is, my point was 1.
>> should we get a confirmation from upstream it's not going to change? 2.
>> if it's ABI let's document what do we expect to be there.
> 
> 
> Could you help explain the expectations and stability guarantees of
> vmcoreinfo ELF note ?
> 
> I am a bit stuck here, after all, vmcoreinfo is mostly used by crash
> so I thought you could help.
> 
> The only thing qemu does with it is try to get NUMBER(phys_base)=
> field to update the phys_base used in the various dump headers. (this
> could be dropped, and qemu ignoring the note content, if the debug
> tools take vmcoreinfo values  with higher priority than other header
> fields)

I agree; if "crash" guarantees that the vmcoreinfo note will override
whatever phys_base value QEMU may have guessed otherwise (from other
places) and written to some dedicated phys_base header fields, then in
QEMU we don't have to propagate phys_base from the vmcoreinfo note to
said other fields -- we can treat the vmcoreinfo note entirely opaquely.

Thanks
Laszlo

> 
>> But again since there's not a whole lot of documentation here
>> that you provided, I might be misunderstanding completely.
> 
> Because there isn't much available in the kernel either, except
> Documentation/ABI/testing/sysfs-kernel-vmcoreinfo.
> 
>
Dave Anderson Aug. 7, 2017, 3:44 p.m. UTC | #12
----- Original Message -----
> Hi Dave
> 
> On Wed, Jul 26, 2017 at 10:21 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sat, Jul 15, 2017 at 01:47:50AM +0200, Marc-André Lureau wrote:
> >> >
> >> > There's more info scattered in other places.
> >> >
> >> > Why do you get to document it? Because you are the one exposing it
> >> > across the hypervisor/vm boundary where it will need to be
> >> > understood by people/tools not running within guest.
> >> >
> >> > So "just read the script in qemu source" is not how an interface
> >> > should be documented.
> >>
> >> I don't understand the issue, it's a kernel ELF note that qemu passes
> >> for dump/crash tools in the dump headers/sections.
> >
> > The way it looks to me, this patchset is exposing an internal kernel
> > detail and making it part of ABI maybe it already is, my point was 1.
> > should we get a confirmation from upstream it's not going to change? 2.
> > if it's ABI let's document what do we expect to be there.
> 
> 
> Could you help explain the expectations and stability guarantees of
> vmcoreinfo ELF note ?
> 
> I am a bit stuck here, after all, vmcoreinfo is mostly used by crash
> so I thought you could help.

Sorry for the delay, I'm just back from vacation...

The vmcoreinfo string data is *primarily* used by the makedumpfile facility,
because it needs to find its way around the dumped kernel memory without having
the the vmlinux file's debuginfo data when it's running in the second kernel.

Since crash utilizes the vmlinux file, it only reads a small handful of 
the vmcoreinfo items for things like phys_base that cannot be gleaned
from the vmlinux debuginfo data.  

Anyway, I'm not sure what you mean by "expectations and stability guarantees"?
It's just a block of ASCII string data of a given size that simply needs
to be transferred to the vmcore.  New strings may be added to it at any
time, but that shouldn't have any impact on what you're doing.

Dave
 
 
> The only thing qemu does with it is try to get NUMBER(phys_base)=
> field to update the phys_base used in the various dump headers. (this
> could be dropped, and qemu ignoring the note content, if the debug
> tools take vmcoreinfo values  with higher priority than other header
> fields)
> 
> > But again since there's not a whole lot of documentation here
> > that you provided, I might be misunderstanding completely.
> 
> Because there isn't much available in the kernel either, except
> Documentation/ABI/testing/sysfs-kernel-vmcoreinfo.
> 
> 
> --
> Marc-André Lureau
>
diff mbox

Patch

diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 88d0738d76..cf781bcd34 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -211,6 +211,7 @@  struct AcpiBuildTables {
     GArray *rsdp;
     GArray *tcpalog;
     GArray *vmgenid;
+    GArray *vmcoreinfo;
     BIOSLinker *linker;
 } AcpiBuildTables;
 
diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
new file mode 100644
index 0000000000..6a73bcd1b2
--- /dev/null
+++ b/include/hw/acpi/vmcoreinfo.h
@@ -0,0 +1,37 @@ 
+#ifndef ACPI_VMCOREINFO_H
+#define ACPI_VMCOREINFO_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/qdev.h"
+
+#define VMCOREINFO_DEVICE           "vmcoreinfo"
+#define VMCOREINFO_FW_CFG_FILE      "etc/vmcoreinfo"
+#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
+
+/* Occupy a page of memory */
+#define VMCOREINFO_FW_CFG_SIZE      4096
+
+/* allow space for OVMF SDT Header Probe Supressor */
+#define VMCOREINFO_OFFSET           sizeof(AcpiTableHeader)
+
+#define VMCOREINFO(obj) OBJECT_CHECK(VMCoreInfoState, (obj), VMCOREINFO_DEVICE)
+
+typedef struct VMCoreInfoState {
+    DeviceClass parent_obj;
+    uint8_t vmcoreinfo_addr_le[8];   /* Address of memory region */
+    bool write_pointer_available;
+} VMCoreInfoState;
+
+/* returns NULL unless there is exactly one device */
+static inline Object *find_vmcoreinfo_dev(void)
+{
+    return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
+}
+
+void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
+                           GArray *vmci, BIOSLinker *linker);
+void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci);
+bool vmcoreinfo_get(VMCoreInfoState *vis, uint64_t *paddr, uint32_t *size,
+                    Error **errp);
+
+#endif
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 36a6cc450e..47043ade4a 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1561,6 +1561,7 @@  void acpi_build_tables_init(AcpiBuildTables *tables)
     tables->table_data = g_array_new(false, true /* clear */, 1);
     tables->tcpalog = g_array_new(false, true /* clear */, 1);
     tables->vmgenid = g_array_new(false, true /* clear */, 1);
+    tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
     tables->linker = bios_linker_loader_init();
 }
 
@@ -1571,6 +1572,7 @@  void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
     g_array_free(tables->table_data, true);
     g_array_free(tables->tcpalog, mfre);
     g_array_free(tables->vmgenid, mfre);
+    g_array_free(tables->vmcoreinfo, mfre);
 }
 
 /* Build rsdt table */
diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
new file mode 100644
index 0000000000..0ea41de8d9
--- /dev/null
+++ b/hw/acpi/vmcoreinfo.c
@@ -0,0 +1,208 @@ 
+/*
+ *  Virtual Machine coreinfo device
+ *  (based on Virtual Machine Generation ID Device)
+ *
+ *  Copyright (C) 2017 Red Hat, Inc.
+ *  Copyright (C) 2017 Skyport Systems.
+ *
+ *  Authors: Marc-André Lureau <marcandre.lureau@redhat.com>
+ *           Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmcoreinfo.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+
+void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data,
+                           GArray *vmci, BIOSLinker *linker)
+{
+    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+    uint32_t vcia_offset;
+
+    g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
+
+    /* Put this in a separate SSDT table */
+    ssdt = init_aml_allocator();
+
+    /* Reserve space for header */
+    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+    /* Storage address */
+    vcia_offset = table_data->len +
+        build_append_named_dword(ssdt->buf, "VCIA");
+    scope = aml_scope("\\_SB");
+    dev = aml_device("VMCI");
+    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
+
+    /* Simple status method to check that address is linked and non-zero */
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_int(0xf), addr));
+    if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
+    aml_append(if_ctx, aml_store(aml_int(0), addr));
+    aml_append(method, if_ctx);
+    aml_append(method, aml_return(addr));
+    aml_append(dev, method);
+
+    /* the ADDR method returns two 32-bit words representing the lower and
+     * upper halves of the physical address of the vmcoreinfo area
+     */
+    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_package(2), addr));
+
+    aml_append(method, aml_store(aml_add(aml_name("VCIA"),
+                                         aml_int(VMCOREINFO_OFFSET), NULL),
+                                 aml_index(addr, aml_int(0))));
+    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+    aml_append(method, aml_return(addr));
+
+    aml_append(dev, method);
+    aml_append(scope, dev);
+    aml_append(ssdt, scope);
+
+    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+    /* Allocate guest memory */
+    bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
+                             false /* page boundary, high memory */);
+
+    /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
+     * blob so QEMU can read the info from there.  The address is
+     * expected to be < 4GB, but write 64 bits anyway.
+     * The address that is patched in is offset in order to implement
+     * the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmcoreinfo.txt for more details.
+     */
+    bios_linker_loader_write_pointer(linker,
+        VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+        VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
+
+    /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
+     * and read it.  Note that while we provide storage for 64 bits, only
+     * the least-signficant 32 get patched into AML.
+     */
+    bios_linker_loader_add_pointer(linker,
+        ACPI_BUILD_TABLE_FILE, vcia_offset, sizeof(uint32_t),
+        VMCOREINFO_FW_CFG_FILE, 0);
+
+    build_header(linker, table_data,
+        (void *)(table_data->data + table_data->len - ssdt->buf->len),
+        "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
+    free_aml_allocator();
+}
+
+void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci)
+{
+    /* Create a read-only fw_cfg file for vmcoreinfo allocation */
+    /* XXX: linker could learn to allocate without backing fw_cfg? */
+    fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
+                    VMCOREINFO_FW_CFG_SIZE);
+    /* Create a read-write fw_cfg file for Address */
+    fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
+                             vis->vmcoreinfo_addr_le,
+                             ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
+}
+
+bool vmcoreinfo_get(VMCoreInfoState *vis,
+                    uint64_t *paddr, uint32_t *size,
+                    Error **errp)
+{
+    uint32_t vmcoreinfo_addr;
+    uint32_t version;
+
+    assert(vis);
+    assert(paddr);
+    assert(size);
+
+    memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, sizeof(vmcoreinfo_addr));
+    vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
+    if (!vmcoreinfo_addr) {
+        error_setg(errp, "BIOS has not yet written the address of %s",
+                   VMCOREINFO_DEVICE);
+        return false;
+    }
+
+    cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
+    if (version != 0) {
+        error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
+        return false;
+    }
+
+    cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(*paddr));
+    *paddr = le64_to_cpu(*paddr);
+    cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(*size));
+    *size = le32_to_cpu(*size);
+
+    return true;
+}
+
+static const VMStateDescription vmstate_vmcoreinfo = {
+    .name = "vmcoreinfo",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VMCoreInfoState, sizeof(uint64_t)),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vmcoreinfo_handle_reset(void *opaque)
+{
+    VMCoreInfoState *vis = VMCOREINFO(opaque);
+
+    /* Clear the guest-allocated address when the VM resets */
+    memset(vis->vmcoreinfo_addr_le, 0, ARRAY_SIZE(vis->vmcoreinfo_addr_le));
+}
+
+static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
+{
+    if (!bios_linker_loader_can_write_pointer()) {
+        error_setg(errp, "%s requires DMA write support in fw_cfg, "
+                   "which this machine type does not provide",
+                   VMCOREINFO_DEVICE);
+        return;
+    }
+
+    /* Given that this function is executing, there is at least one VMCOREINFO
+     * device. Check if there are several.
+     */
+    if (!find_vmcoreinfo_dev()) {
+        error_setg(errp, "at most one %s device is permitted",
+                   VMCOREINFO_DEVICE);
+        return;
+    }
+
+    qemu_register_reset(vmcoreinfo_handle_reset, dev);
+}
+
+static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_vmcoreinfo;
+    dc->realize = vmcoreinfo_realize;
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo vmcoreinfo_device_info = {
+    .name          = VMCOREINFO_DEVICE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(VMCoreInfoState),
+    .class_init    = vmcoreinfo_device_class_init,
+};
+
+static void vmcoreinfo_register_types(void)
+{
+    type_register_static(&vmcoreinfo_device_info);
+}
+
+type_init(vmcoreinfo_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6b7bade183..7ac529680e 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -43,6 +43,7 @@ 
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
 #include "hw/acpi/vmgenid.h"
+#include "hw/acpi/vmcoreinfo.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/timer/mc146818rtc_regs.h"
 #include "sysemu/numa.h"
@@ -2631,6 +2632,7 @@  void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     GArray *tables_blob = tables->table_data;
     AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
     Object *vmgenid_dev;
+    Object *vmcoreinfo_dev;
 
     acpi_get_pm_info(&pm);
     acpi_get_misc_info(&misc);
@@ -2680,6 +2682,12 @@  void acpi_build(AcpiBuildTables *tables, MachineState *machine)
         vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
                            tables->vmgenid, tables->linker);
     }
+    vmcoreinfo_dev = find_vmcoreinfo_dev();
+    if (vmcoreinfo_dev) {
+        acpi_add_table(table_offsets, tables_blob);
+        vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob,
+                              tables->vmcoreinfo, tables->linker);
+    }
 
     if (misc.has_hpet) {
         acpi_add_table(table_offsets, tables_blob);
@@ -2856,6 +2864,7 @@  void acpi_setup(void)
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
     Object *vmgenid_dev;
+    Object *vmcoreinfo_dev;
 
     if (!pcms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2897,6 +2906,11 @@  void acpi_setup(void)
         vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
                            tables.vmgenid);
     }
+    vmcoreinfo_dev = find_vmcoreinfo_dev();
+    if (vmcoreinfo_dev) {
+        vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg,
+                              tables.vmcoreinfo);
+    }
 
     if (!pcmc->rsdp_in_ram) {
         /*
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 93e995d318..320dd3680d 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -120,6 +120,7 @@  CONFIG_XIO3130=y
 CONFIG_IOH3420=y
 CONFIG_I82801B11=y
 CONFIG_ACPI=y
+CONFIG_ACPI_VMCOREINFO=y
 CONFIG_SMBIOS=y
 CONFIG_ASPEED_SOC=y
 CONFIG_GPIO_KEY=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index d2ab2f6655..df68628895 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -59,3 +59,4 @@  CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
 CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 9bde2f1c4b..e39ad5a680 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -59,3 +59,4 @@  CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
 CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt
new file mode 100644
index 0000000000..36d5a39ab1
--- /dev/null
+++ b/docs/specs/vmcoreinfo.txt
@@ -0,0 +1,138 @@ 
+VIRTUAL MACHINE COREINFO DEVICE
+===============================
+
+Copyright (C) 2017 Red Hat, Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+===
+
+The VM coreinfo (vmcoreinfo) device is an emulated device which
+exposes a 4k memory range to the guest to store various informations
+useful to debug the guest OS.
+
+QEMU Implementation
+-------------------
+
+The vmcoreinfo device is put in its own ACPI descriptor table, in a
+Secondary System Description Table, or SSDT.
+
+The following is a dump of the contents from a running system:
+
+# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
+/*
+ * Intel ACPI Component Architecture
+ * AML/ASL+ Disassembler version 20160831-64
+ * Copyright (c) 2000 - 2016 Intel Corporation
+ *
+ * Disassembling to symbolic ASL+ operators
+ *
+ * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017
+ *
+ * Original Table Header:
+ *     Signature        "SSDT"
+ *     Length           0x00000086 (134)
+ *     Revision         0x01
+ *     Checksum         0x5C
+ *     OEM ID           "BOCHS "
+ *     OEM Table ID     "VMCOREIN"
+ *     OEM Revision     0x00000001 (1)
+ *     Compiler ID      "BXPC"
+ *     Compiler Version 0x00000001 (1)
+ */
+DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001)
+{
+    Name (VCIA, 0x3FFFF000)
+    Scope (\_SB)
+    {
+        Device (VMCI)
+        {
+            Name (_HID, "QEMUVMCI")  // _HID: Hardware ID
+            Method (_STA, 0, NotSerialized)  // _STA: Status
+            {
+                Local0 = 0x0F
+                If (VCIA == Zero)
+                {
+                    Local0 = Zero
+                }
+
+                Return (Local0)
+            }
+
+            Method (ADDR, 0, NotSerialized)
+            {
+                Local0 = Package (0x02) {}
+                Local0 [Zero] = (VCIA + 0x24)
+                Local0 [One] = Zero
+                Return (Local0)
+            }
+        }
+    }
+}
+
+
+Design Details:
+---------------
+
+QEMU must be able to read the contents of the device memory,
+specifically when starting a memory dump.  In order to do this, QEMU
+must know the address that has been allocated.
+
+The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
+These are data object that are visible to both QEMU and guests, and are
+addressable as sequential files.
+
+More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
+
+Two fw_cfg blobs are used in this case:
+
+/etc/vmcoreinfo      - used to allocate memory range, read-only to the guest
+/etc/vmcoreinfo-addr - contains the address of the allocated range
+                     - writeable by the guest
+
+
+QEMU sends the following commands to the guest at startup:
+
+1. Allocate memory for vmcoreinfo fw_cfg blob.
+2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as
+   shown above in the iasl dump).  Note that this change is not propagated
+   back to QEMU.
+3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr
+   via the fw_cfg DMA interface.
+
+After step 3, QEMU is able to read the contents of vmcoreinfo.
+
+The value of VCIA is persisted via the VMState mechanism.
+
+
+Storage Format:
+---------------
+
+The content is expected to use little-endian format.
+
+In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
+the vmcoreinfo blob has 36 bytes of padding:
+
++-----------------------------------+
+| SSDT with OEM Table ID = VMCOREIN |
++-----------------------------------+
+| ...                               |       TOP OF PAGE
+| VCIA dword object ----------------|-----> +---------------------------+
+| ...                               |       | fw-allocated array for    |
+| _STA method referring to VCIA     |       | "etc/vmcoreinfo"          |
+| ...                               |       +---------------------------+
+| ADDR method referring to VCIA     |       |  0: OVMF SDT Header probe |
+| ...                               |       |     suppressor            |
++-----------------------------------+       | 36: uint32 version field  |
+                                            | 40: info contents         |
+                                            |     ....                  |
+                                            +---------------------------+
+                                            END OF PAGE
+
+Version 0 content:
+
+ uint64 paddr:
+  Physical address of the Linux vmcoreinfo ELF note.
+ uint32 size:
+  Size of the vmcoreinfo ELF note.
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 11c35bcb44..9623078f95 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -6,6 +6,7 @@  common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
+common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o