diff mbox series

x86: temporarily remove all attempts to provide setup_data

Message ID 20230208180835.234638-1-Jason@zx2c4.com
State New
Headers show
Series x86: temporarily remove all attempts to provide setup_data | expand

Commit Message

Jason A. Donenfeld Feb. 8, 2023, 6:08 p.m. UTC
All attempts at providing setup_data have been made as an iteration on
whatever was there before, stretching back to the original
implementation used for DTBs that [mis]used the kernel image itself.
We've now had a dozen rounds of bugs and hacks, and the result is
turning into a pile of unmaintainable and increasingly brittle hacks.

Let's just rip out all the madness and start over. We can re-architect
this based on having a separate standalone setup_data file, which is how
it should have been done in the first place. This is a larger project
with a few things to coordinate, but we can't really begin thinking
about that while trying to play whack-a-mole with the current buggy
implementation.

So this commit removes the setup_data setting from x86_load_linux(),
while leaving intact the infrastructure we'll need in the future to try
again.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Dov Murik <dovmurik@linux.ibm.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 hw/i386/microvm.c |  15 ++----
 hw/i386/x86.c     | 120 +++++-----------------------------------------
 2 files changed, 17 insertions(+), 118 deletions(-)

Comments

Michael S. Tsirkin Feb. 8, 2023, 6:13 p.m. UTC | #1
On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> All attempts at providing setup_data have been made as an iteration on
> whatever was there before, stretching back to the original
> implementation used for DTBs that [mis]used the kernel image itself.
> We've now had a dozen rounds of bugs and hacks, and the result is
> turning into a pile of unmaintainable and increasingly brittle hacks.
> 
> Let's just rip out all the madness and start over. We can re-architect
> this based on having a separate standalone setup_data file, which is how
> it should have been done in the first place. This is a larger project
> with a few things to coordinate, but we can't really begin thinking
> about that while trying to play whack-a-mole with the current buggy
> implementation.
> 
> So this commit removes the setup_data setting from x86_load_linux(),
> while leaving intact the infrastructure we'll need in the future to try
> again.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Dov Murik <dovmurik@linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> Cc: Nathan Chancellor <nathan@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

I think I'll be happier if this is just a revert of
the relevant commits in reverse order to make life easier
for backporters.
Unless that's too much work as we made other changes around
this code?

Failing that list the affected commits so people at least
know what to revert?



> ---
>  hw/i386/microvm.c |  15 ++----
>  hw/i386/x86.c     | 120 +++++-----------------------------------------
>  2 files changed, 17 insertions(+), 118 deletions(-)
> 
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> index 29f30dd6d3..170a331e3f 100644
> --- a/hw/i386/microvm.c
> +++ b/hw/i386/microvm.c
> @@ -378,8 +378,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>      MicrovmMachineState *mms = MICROVM_MACHINE(machine);
>      BusState *bus;
>      BusChild *kid;
> -    char *cmdline, *existing_cmdline;
> -    size_t len;
> +    char *cmdline;
>  
>      /*
>       * Find MMIO transports with attached devices, and add them to the kernel
> @@ -388,8 +387,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>       * Yes, this is a hack, but one that heavily improves the UX without
>       * introducing any significant issues.
>       */
> -    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
> -    cmdline = g_strdup(existing_cmdline);
> +    cmdline = g_strdup(machine->kernel_cmdline);
>      bus = sysbus_get_default();
>      QTAILQ_FOREACH(kid, &bus->children, sibling) {
>          DeviceState *dev = kid->child;
> @@ -413,12 +411,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>          }
>      }
>  
> -    len = strlen(cmdline);
> -    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
> -        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
> -    } else {
> -        memcpy(existing_cmdline, cmdline, len + 1);
> -    }
> +    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
> +    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
> +
>      g_free(cmdline);
>  }
>  
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index eaff4227bd..6cfdca9acd 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -50,7 +50,6 @@
>  #include "hw/intc/i8259.h"
>  #include "hw/rtc/mc146818rtc.h"
>  #include "target/i386/sev.h"
> -#include "hw/i386/microvm.h"
>  
>  #include "hw/acpi/cpu_hotplug.h"
>  #include "hw/irq.h"
> @@ -770,30 +769,6 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>  
> -typedef struct SetupDataFixup {
> -    void *pos;
> -    hwaddr orig_val, new_val;
> -    uint32_t addr;
> -} SetupDataFixup;
> -
> -static void fixup_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->new_val);
> -}
> -
> -static void reset_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->orig_val);
> -}
> -
> -static void reset_rng_seed(void *opaque)
> -{
> -    SetupData *setup_data = opaque;
> -    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
> -}
> -
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -803,29 +778,20 @@ void x86_load_linux(X86MachineState *x86ms,
>      bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
>      uint16_t protocol;
>      int setup_size, kernel_size, cmdline_size;
> -    int dtb_size, setup_data_offset;
>      uint32_t initrd_max;
>      uint8_t header[8192], *setup, *kernel;
> -    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
> +    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
>      FILE *f;
>      char *vmode;
>      MachineState *machine = MACHINE(x86ms);
> -    SetupData *setup_data;
>      const char *kernel_filename = machine->kernel_filename;
>      const char *initrd_filename = machine->initrd_filename;
> -    const char *dtb_filename = machine->dtb;
> -    char *kernel_cmdline;
> +    const char *kernel_cmdline = machine->kernel_cmdline;
>      SevKernelLoaderContext sev_load_ctx = {};
>      enum { RNG_SEED_LENGTH = 32 };
>  
> -    /*
> -     * Add the NUL terminator, some padding for the microvm cmdline fiddling
> -     * hack, and then align to 16 bytes as a paranoia measure
> -     */
> -    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
> -                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
> -    /* Make a copy, since we might append arbitrary bytes to it later. */
> -    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
> +    /* Align to 16 bytes as a paranoia measure */
> +    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
>  
>      /* load the kernel header */
>      f = fopen(kernel_filename, "rb");
> @@ -966,6 +932,12 @@ void x86_load_linux(X86MachineState *x86ms,
>          initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
>      }
>  
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
> +    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
> +    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> +    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
> +
>      if (protocol >= 0x202) {
>          stl_p(header + 0x228, cmdline_addr);
>      } else {
> @@ -1078,81 +1050,13 @@ void x86_load_linux(X86MachineState *x86ms,
>      }
>      fclose(f);
>  
> -    /* append dtb to kernel */
> -    if (dtb_filename) {
> -        if (protocol < 0x209) {
> -            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
> -            exit(1);
> -        }
> -
> -        dtb_size = get_image_size(dtb_filename);
> -        if (dtb_size <= 0) {
> -            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
> -                    dtb_filename, strerror(errno));
> -            exit(1);
> -        }
> -
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + dtb_size;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_DTB);
> -        setup_data->len = cpu_to_le32(dtb_size);
> -        load_image_size(dtb_filename, setup_data->data, dtb_size);
> -    }
> -
> -    if (!legacy_no_rng_seed && protocol >= 0x209) {
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> -        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> -        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> -        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
> -                                  setup_data, kernel, kernel_size, true);
> -    } else {
> -        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
> -    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> -    sev_load_ctx.cmdline_size = cmdline_size;
> -
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
>      sev_load_ctx.kernel_data = (char *)kernel;
>      sev_load_ctx.kernel_size = kernel_size;
>  
> -    /*
> -     * If we're starting an encrypted VM, it will be OVMF based, which uses the
> -     * efi stub for booting and doesn't require any values to be placed in the
> -     * kernel header.  We therefore don't update the header so the hash of the
> -     * kernel on the other side of the fw_cfg interface matches the hash of the
> -     * file the user passed in.
> -     */
> -    if (!sev_enabled() && first_setup_data) {
> -        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
> -
> -        memcpy(setup, header, MIN(sizeof(header), setup_size));
> -        /* Offset 0x250 is a pointer to the first setup_data link. */
> -        fixup->pos = setup + 0x250;
> -        fixup->orig_val = ldq_p(fixup->pos);
> -        fixup->new_val = first_setup_data;
> -        fixup->addr = cpu_to_le32(real_addr);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> -                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> -        qemu_register_reset(reset_setup_data, fixup);
> -    } else {
> -        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> -    }
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> -- 
> 2.39.1
Nathan Chancellor Feb. 8, 2023, 6:14 p.m. UTC | #2
On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> All attempts at providing setup_data have been made as an iteration on
> whatever was there before, stretching back to the original
> implementation used for DTBs that [mis]used the kernel image itself.
> We've now had a dozen rounds of bugs and hacks, and the result is
> turning into a pile of unmaintainable and increasingly brittle hacks.
> 
> Let's just rip out all the madness and start over. We can re-architect
> this based on having a separate standalone setup_data file, which is how
> it should have been done in the first place. This is a larger project
> with a few things to coordinate, but we can't really begin thinking
> about that while trying to play whack-a-mole with the current buggy
> implementation.
> 
> So this commit removes the setup_data setting from x86_load_linux(),
> while leaving intact the infrastructure we'll need in the future to try
> again.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Dov Murik <dovmurik@linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> Cc: Nathan Chancellor <nathan@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

Thanks for the quick patch! Both of my use cases appear fixed.

Tested-by: Nathan Chancellor <nathan@kernel.org>

> ---
>  hw/i386/microvm.c |  15 ++----
>  hw/i386/x86.c     | 120 +++++-----------------------------------------
>  2 files changed, 17 insertions(+), 118 deletions(-)
> 
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> index 29f30dd6d3..170a331e3f 100644
> --- a/hw/i386/microvm.c
> +++ b/hw/i386/microvm.c
> @@ -378,8 +378,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>      MicrovmMachineState *mms = MICROVM_MACHINE(machine);
>      BusState *bus;
>      BusChild *kid;
> -    char *cmdline, *existing_cmdline;
> -    size_t len;
> +    char *cmdline;
>  
>      /*
>       * Find MMIO transports with attached devices, and add them to the kernel
> @@ -388,8 +387,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>       * Yes, this is a hack, but one that heavily improves the UX without
>       * introducing any significant issues.
>       */
> -    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
> -    cmdline = g_strdup(existing_cmdline);
> +    cmdline = g_strdup(machine->kernel_cmdline);
>      bus = sysbus_get_default();
>      QTAILQ_FOREACH(kid, &bus->children, sibling) {
>          DeviceState *dev = kid->child;
> @@ -413,12 +411,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>          }
>      }
>  
> -    len = strlen(cmdline);
> -    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
> -        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
> -    } else {
> -        memcpy(existing_cmdline, cmdline, len + 1);
> -    }
> +    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
> +    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
> +
>      g_free(cmdline);
>  }
>  
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index eaff4227bd..6cfdca9acd 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -50,7 +50,6 @@
>  #include "hw/intc/i8259.h"
>  #include "hw/rtc/mc146818rtc.h"
>  #include "target/i386/sev.h"
> -#include "hw/i386/microvm.h"
>  
>  #include "hw/acpi/cpu_hotplug.h"
>  #include "hw/irq.h"
> @@ -770,30 +769,6 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>  
> -typedef struct SetupDataFixup {
> -    void *pos;
> -    hwaddr orig_val, new_val;
> -    uint32_t addr;
> -} SetupDataFixup;
> -
> -static void fixup_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->new_val);
> -}
> -
> -static void reset_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->orig_val);
> -}
> -
> -static void reset_rng_seed(void *opaque)
> -{
> -    SetupData *setup_data = opaque;
> -    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
> -}
> -
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -803,29 +778,20 @@ void x86_load_linux(X86MachineState *x86ms,
>      bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
>      uint16_t protocol;
>      int setup_size, kernel_size, cmdline_size;
> -    int dtb_size, setup_data_offset;
>      uint32_t initrd_max;
>      uint8_t header[8192], *setup, *kernel;
> -    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
> +    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
>      FILE *f;
>      char *vmode;
>      MachineState *machine = MACHINE(x86ms);
> -    SetupData *setup_data;
>      const char *kernel_filename = machine->kernel_filename;
>      const char *initrd_filename = machine->initrd_filename;
> -    const char *dtb_filename = machine->dtb;
> -    char *kernel_cmdline;
> +    const char *kernel_cmdline = machine->kernel_cmdline;
>      SevKernelLoaderContext sev_load_ctx = {};
>      enum { RNG_SEED_LENGTH = 32 };
>  
> -    /*
> -     * Add the NUL terminator, some padding for the microvm cmdline fiddling
> -     * hack, and then align to 16 bytes as a paranoia measure
> -     */
> -    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
> -                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
> -    /* Make a copy, since we might append arbitrary bytes to it later. */
> -    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
> +    /* Align to 16 bytes as a paranoia measure */
> +    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
>  
>      /* load the kernel header */
>      f = fopen(kernel_filename, "rb");
> @@ -966,6 +932,12 @@ void x86_load_linux(X86MachineState *x86ms,
>          initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
>      }
>  
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
> +    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
> +    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> +    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
> +
>      if (protocol >= 0x202) {
>          stl_p(header + 0x228, cmdline_addr);
>      } else {
> @@ -1078,81 +1050,13 @@ void x86_load_linux(X86MachineState *x86ms,
>      }
>      fclose(f);
>  
> -    /* append dtb to kernel */
> -    if (dtb_filename) {
> -        if (protocol < 0x209) {
> -            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
> -            exit(1);
> -        }
> -
> -        dtb_size = get_image_size(dtb_filename);
> -        if (dtb_size <= 0) {
> -            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
> -                    dtb_filename, strerror(errno));
> -            exit(1);
> -        }
> -
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + dtb_size;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_DTB);
> -        setup_data->len = cpu_to_le32(dtb_size);
> -        load_image_size(dtb_filename, setup_data->data, dtb_size);
> -    }
> -
> -    if (!legacy_no_rng_seed && protocol >= 0x209) {
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> -        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> -        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> -        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
> -                                  setup_data, kernel, kernel_size, true);
> -    } else {
> -        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
> -    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> -    sev_load_ctx.cmdline_size = cmdline_size;
> -
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
>      sev_load_ctx.kernel_data = (char *)kernel;
>      sev_load_ctx.kernel_size = kernel_size;
>  
> -    /*
> -     * If we're starting an encrypted VM, it will be OVMF based, which uses the
> -     * efi stub for booting and doesn't require any values to be placed in the
> -     * kernel header.  We therefore don't update the header so the hash of the
> -     * kernel on the other side of the fw_cfg interface matches the hash of the
> -     * file the user passed in.
> -     */
> -    if (!sev_enabled() && first_setup_data) {
> -        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
> -
> -        memcpy(setup, header, MIN(sizeof(header), setup_size));
> -        /* Offset 0x250 is a pointer to the first setup_data link. */
> -        fixup->pos = setup + 0x250;
> -        fixup->orig_val = ldq_p(fixup->pos);
> -        fixup->new_val = first_setup_data;
> -        fixup->addr = cpu_to_le32(real_addr);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> -                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> -        qemu_register_reset(reset_setup_data, fixup);
> -    } else {
> -        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> -    }
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> -- 
> 2.39.1
>
Jason A. Donenfeld Feb. 8, 2023, 6:14 p.m. UTC | #3
On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > All attempts at providing setup_data have been made as an iteration on
> > whatever was there before, stretching back to the original
> > implementation used for DTBs that [mis]used the kernel image itself.
> > We've now had a dozen rounds of bugs and hacks, and the result is
> > turning into a pile of unmaintainable and increasingly brittle hacks.
> >
> > Let's just rip out all the madness and start over. We can re-architect
> > this based on having a separate standalone setup_data file, which is how
> > it should have been done in the first place. This is a larger project
> > with a few things to coordinate, but we can't really begin thinking
> > about that while trying to play whack-a-mole with the current buggy
> > implementation.
> >
> > So this commit removes the setup_data setting from x86_load_linux(),
> > while leaving intact the infrastructure we'll need in the future to try
> > again.
> >
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Richard Henderson <richard.henderson@linaro.org>
> > Cc: H. Peter Anvin <hpa@zytor.com>
> > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > Cc: Nathan Chancellor <nathan@kernel.org>
> > Cc: Borislav Petkov <bp@alien8.de>
> > Cc: Eric Biggers <ebiggers@kernel.org>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
>
> I think I'll be happier if this is just a revert of
> the relevant commits in reverse order to make life easier
> for backporters.
> Unless that's too much work as we made other changes around
> this code?

I think that's going to be messy. And it won't handle the dtb stuff
either straightforwardly.
Michael S. Tsirkin Feb. 8, 2023, 6:18 p.m. UTC | #4
On Wed, Feb 08, 2023 at 03:14:38PM -0300, Jason A. Donenfeld wrote:
> On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > > All attempts at providing setup_data have been made as an iteration on
> > > whatever was there before, stretching back to the original
> > > implementation used for DTBs that [mis]used the kernel image itself.
> > > We've now had a dozen rounds of bugs and hacks, and the result is
> > > turning into a pile of unmaintainable and increasingly brittle hacks.
> > >
> > > Let's just rip out all the madness and start over. We can re-architect
> > > this based on having a separate standalone setup_data file, which is how
> > > it should have been done in the first place. This is a larger project
> > > with a few things to coordinate, but we can't really begin thinking
> > > about that while trying to play whack-a-mole with the current buggy
> > > implementation.
> > >
> > > So this commit removes the setup_data setting from x86_load_linux(),
> > > while leaving intact the infrastructure we'll need in the future to try
> > > again.
> > >
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: Richard Henderson <richard.henderson@linaro.org>
> > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > > Cc: Nathan Chancellor <nathan@kernel.org>
> > > Cc: Borislav Petkov <bp@alien8.de>
> > > Cc: Eric Biggers <ebiggers@kernel.org>
> > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> >
> > I think I'll be happier if this is just a revert of
> > the relevant commits in reverse order to make life easier
> > for backporters.
> > Unless that's too much work as we made other changes around
> > this code?
> 
> I think that's going to be messy. And it won't handle the dtb stuff
> either straightforwardly.

List of Fixes tags so people can at least figure out whether they
have a version that needs this fix then?
Jason A. Donenfeld Feb. 8, 2023, 6:26 p.m. UTC | #5
On Wed, Feb 08, 2023 at 01:18:37PM -0500, Michael S. Tsirkin wrote:
> On Wed, Feb 08, 2023 at 03:14:38PM -0300, Jason A. Donenfeld wrote:
> > On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > > > All attempts at providing setup_data have been made as an iteration on
> > > > whatever was there before, stretching back to the original
> > > > implementation used for DTBs that [mis]used the kernel image itself.
> > > > We've now had a dozen rounds of bugs and hacks, and the result is
> > > > turning into a pile of unmaintainable and increasingly brittle hacks.
> > > >
> > > > Let's just rip out all the madness and start over. We can re-architect
> > > > this based on having a separate standalone setup_data file, which is how
> > > > it should have been done in the first place. This is a larger project
> > > > with a few things to coordinate, but we can't really begin thinking
> > > > about that while trying to play whack-a-mole with the current buggy
> > > > implementation.
> > > >
> > > > So this commit removes the setup_data setting from x86_load_linux(),
> > > > while leaving intact the infrastructure we'll need in the future to try
> > > > again.
> > > >
> > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > Cc: Richard Henderson <richard.henderson@linaro.org>
> > > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > > > Cc: Nathan Chancellor <nathan@kernel.org>
> > > > Cc: Borislav Petkov <bp@alien8.de>
> > > > Cc: Eric Biggers <ebiggers@kernel.org>
> > > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > >
> > > I think I'll be happier if this is just a revert of
> > > the relevant commits in reverse order to make life easier
> > > for backporters.
> > > Unless that's too much work as we made other changes around
> > > this code?
> > 
> > I think that's going to be messy. And it won't handle the dtb stuff
> > either straightforwardly.
> 
> List of Fixes tags so people can at least figure out whether they
> have a version that needs this fix then?

7.2 is when the functionality started causing problems for most people.
But the buggy code goes back to 3cbeb524 in 2016.

Jason
Daniel P. Berrangé Feb. 8, 2023, 6:26 p.m. UTC | #6
On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> All attempts at providing setup_data have been made as an iteration on
> whatever was there before, stretching back to the original
> implementation used for DTBs that [mis]used the kernel image itself.
> We've now had a dozen rounds of bugs and hacks, and the result is
> turning into a pile of unmaintainable and increasingly brittle hacks.
> 
> Let's just rip out all the madness and start over. We can re-architect
> this based on having a separate standalone setup_data file, which is how
> it should have been done in the first place. This is a larger project
> with a few things to coordinate, but we can't really begin thinking
> about that while trying to play whack-a-mole with the current buggy
> implementation.
> 
> So this commit removes the setup_data setting from x86_load_linux(),
> while leaving intact the infrastructure we'll need in the future to try
> again.

Technically this changes the ABI of the 7.2.0 machine type version
which we would normally avoid.

In practice I think we can probably get away with doing it.

The number of released guest OS that consume the RNG seed is negligible
at this point in time. Any that do consume shouldn't mind much if it
just disappears on next boot, since it was always just an opt-in, and
the kernel will happily still boot without it.

So I'm fine if we just put a note in the commit message acknowledging
that this is an ABI incompatibility for the machine type, but explaining
why it is none the less OK todo in this exceptional circumstance.

> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Dov Murik <dovmurik@linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> Cc: Nathan Chancellor <nathan@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  hw/i386/microvm.c |  15 ++----
>  hw/i386/x86.c     | 120 +++++-----------------------------------------
>  2 files changed, 17 insertions(+), 118 deletions(-)

Presumably it should be purging all traces of the 'legacy_no_rng_seed'
at the same time, with the view that any future implementation would be
tied to only whatever machine type is current at the time it gets
merged (could be 8.0.0 or 8.1.0 machine type, depending on how quickly
the new design gets settled).

$ git grep legacy_no_rng_seed
hw/i386/pc.c:                   pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
hw/i386/pc.c:                       pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
hw/i386/pc_piix.c:    pcmc->legacy_no_rng_seed = true;
hw/i386/pc_q35.c:    pcmc->legacy_no_rng_seed = true;
hw/i386/x86.c:                    bool legacy_no_rng_seed)
hw/i386/x86.c:    if (!legacy_no_rng_seed) {
include/hw/i386/pc.h:    bool legacy_no_rng_seed;
include/hw/i386/x86.h:                    bool legacy_no_rng_seed);


With regards,
Daniel
Daniel P. Berrangé Feb. 8, 2023, 6:31 p.m. UTC | #7
On Wed, Feb 08, 2023 at 07:26:05PM +0100, Jason A. Donenfeld wrote:
> On Wed, Feb 08, 2023 at 01:18:37PM -0500, Michael S. Tsirkin wrote:
> > On Wed, Feb 08, 2023 at 03:14:38PM -0300, Jason A. Donenfeld wrote:
> > > On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > > > > All attempts at providing setup_data have been made as an iteration on
> > > > > whatever was there before, stretching back to the original
> > > > > implementation used for DTBs that [mis]used the kernel image itself.
> > > > > We've now had a dozen rounds of bugs and hacks, and the result is
> > > > > turning into a pile of unmaintainable and increasingly brittle hacks.
> > > > >
> > > > > Let's just rip out all the madness and start over. We can re-architect
> > > > > this based on having a separate standalone setup_data file, which is how
> > > > > it should have been done in the first place. This is a larger project
> > > > > with a few things to coordinate, but we can't really begin thinking
> > > > > about that while trying to play whack-a-mole with the current buggy
> > > > > implementation.
> > > > >
> > > > > So this commit removes the setup_data setting from x86_load_linux(),
> > > > > while leaving intact the infrastructure we'll need in the future to try
> > > > > again.
> > > > >
> > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > > > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > > > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > > > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > > Cc: Richard Henderson <richard.henderson@linaro.org>
> > > > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > > > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > > > > Cc: Nathan Chancellor <nathan@kernel.org>
> > > > > Cc: Borislav Petkov <bp@alien8.de>
> > > > > Cc: Eric Biggers <ebiggers@kernel.org>
> > > > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > > >
> > > > I think I'll be happier if this is just a revert of
> > > > the relevant commits in reverse order to make life easier
> > > > for backporters.
> > > > Unless that's too much work as we made other changes around
> > > > this code?
> > > 
> > > I think that's going to be messy. And it won't handle the dtb stuff
> > > either straightforwardly.
> > 
> > List of Fixes tags so people can at least figure out whether they
> > have a version that needs this fix then?
> 
> 7.2 is when the functionality started causing problems for most people.
> But the buggy code goes back to 3cbeb524 in 2016.

We can't rip out the full setup_data support back to that point. That
is deleting significant features that would break -dtb IIUC. For that
we would need to have a deprecation period to announce the incompatibility.

I was thinking this would only revert the RNG seed pieces which have
negligible user impact.


With regards,
Daniel
Jason A. Donenfeld Feb. 8, 2023, 6:34 p.m. UTC | #8
On Wed, Feb 08, 2023 at 06:31:20PM +0000, Daniel P. Berrangé wrote:
> On Wed, Feb 08, 2023 at 07:26:05PM +0100, Jason A. Donenfeld wrote:
> > On Wed, Feb 08, 2023 at 01:18:37PM -0500, Michael S. Tsirkin wrote:
> > > On Wed, Feb 08, 2023 at 03:14:38PM -0300, Jason A. Donenfeld wrote:
> > > > On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > >
> > > > > On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > > > > > All attempts at providing setup_data have been made as an iteration on
> > > > > > whatever was there before, stretching back to the original
> > > > > > implementation used for DTBs that [mis]used the kernel image itself.
> > > > > > We've now had a dozen rounds of bugs and hacks, and the result is
> > > > > > turning into a pile of unmaintainable and increasingly brittle hacks.
> > > > > >
> > > > > > Let's just rip out all the madness and start over. We can re-architect
> > > > > > this based on having a separate standalone setup_data file, which is how
> > > > > > it should have been done in the first place. This is a larger project
> > > > > > with a few things to coordinate, but we can't really begin thinking
> > > > > > about that while trying to play whack-a-mole with the current buggy
> > > > > > implementation.
> > > > > >
> > > > > > So this commit removes the setup_data setting from x86_load_linux(),
> > > > > > while leaving intact the infrastructure we'll need in the future to try
> > > > > > again.
> > > > > >
> > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > > > > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > > > > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > > > > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > > > Cc: Richard Henderson <richard.henderson@linaro.org>
> > > > > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > > > > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > > > > > Cc: Nathan Chancellor <nathan@kernel.org>
> > > > > > Cc: Borislav Petkov <bp@alien8.de>
> > > > > > Cc: Eric Biggers <ebiggers@kernel.org>
> > > > > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > > > >
> > > > > I think I'll be happier if this is just a revert of
> > > > > the relevant commits in reverse order to make life easier
> > > > > for backporters.
> > > > > Unless that's too much work as we made other changes around
> > > > > this code?
> > > > 
> > > > I think that's going to be messy. And it won't handle the dtb stuff
> > > > either straightforwardly.
> > > 
> > > List of Fixes tags so people can at least figure out whether they
> > > have a version that needs this fix then?
> > 
> > 7.2 is when the functionality started causing problems for most people.
> > But the buggy code goes back to 3cbeb524 in 2016.
> 
> We can't rip out the full setup_data support back to that point. That
> is deleting significant features that would break -dtb IIUC. For that
> we would need to have a deprecation period to announce the incompatibility.
> 
> I was thinking this would only revert the RNG seed pieces which have
> negligible user impact.

I'm pretty sure -dtb is used by nobody...

Jason
Daniel P. Berrangé Feb. 8, 2023, 6:58 p.m. UTC | #9
On Wed, Feb 08, 2023 at 07:34:30PM +0100, Jason A. Donenfeld wrote:
> On Wed, Feb 08, 2023 at 06:31:20PM +0000, Daniel P. Berrangé wrote:
> > On Wed, Feb 08, 2023 at 07:26:05PM +0100, Jason A. Donenfeld wrote:
> > > On Wed, Feb 08, 2023 at 01:18:37PM -0500, Michael S. Tsirkin wrote:
> > > > On Wed, Feb 08, 2023 at 03:14:38PM -0300, Jason A. Donenfeld wrote:
> > > > > On Wed, Feb 8, 2023 at 3:13 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > > > >
> > > > > > On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > > > > > > All attempts at providing setup_data have been made as an iteration on
> > > > > > > whatever was there before, stretching back to the original
> > > > > > > implementation used for DTBs that [mis]used the kernel image itself.
> > > > > > > We've now had a dozen rounds of bugs and hacks, and the result is
> > > > > > > turning into a pile of unmaintainable and increasingly brittle hacks.
> > > > > > >
> > > > > > > Let's just rip out all the madness and start over. We can re-architect
> > > > > > > this based on having a separate standalone setup_data file, which is how
> > > > > > > it should have been done in the first place. This is a larger project
> > > > > > > with a few things to coordinate, but we can't really begin thinking
> > > > > > > about that while trying to play whack-a-mole with the current buggy
> > > > > > > implementation.
> > > > > > >
> > > > > > > So this commit removes the setup_data setting from x86_load_linux(),
> > > > > > > while leaving intact the infrastructure we'll need in the future to try
> > > > > > > again.
> > > > > > >
> > > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > > > > > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > > > > > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > > > > > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > > > > Cc: Richard Henderson <richard.henderson@linaro.org>
> > > > > > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > > > > > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > > > > > > Cc: Nathan Chancellor <nathan@kernel.org>
> > > > > > > Cc: Borislav Petkov <bp@alien8.de>
> > > > > > > Cc: Eric Biggers <ebiggers@kernel.org>
> > > > > > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > > > > >
> > > > > > I think I'll be happier if this is just a revert of
> > > > > > the relevant commits in reverse order to make life easier
> > > > > > for backporters.
> > > > > > Unless that's too much work as we made other changes around
> > > > > > this code?
> > > > > 
> > > > > I think that's going to be messy. And it won't handle the dtb stuff
> > > > > either straightforwardly.
> > > > 
> > > > List of Fixes tags so people can at least figure out whether they
> > > > have a version that needs this fix then?
> > > 
> > > 7.2 is when the functionality started causing problems for most people.
> > > But the buggy code goes back to 3cbeb524 in 2016.
> > 
> > We can't rip out the full setup_data support back to that point. That
> > is deleting significant features that would break -dtb IIUC. For that
> > we would need to have a deprecation period to announce the incompatibility.
> > 
> > I was thinking this would only revert the RNG seed pieces which have
> > negligible user impact.
> 
> I'm pretty sure -dtb is used by nobody...

Unless it can be demonstrated it is so broken it is technically
impossible to successfully use in any way, then QEMU policy is to
assume that there are users.

The QEMU deprecation policy exists to handle the scenario where we
want to remove a feature and need to alert potential users ahead
of time, so they have an opportunity to object to the breakage.
This is described here:

  https://www.qemu.org/docs/master/about/deprecated.html

With regards,
Daniel
Nathan Chancellor Feb. 8, 2023, 7:10 p.m. UTC | #10
On Wed, Feb 08, 2023 at 11:14:23AM -0700, Nathan Chancellor wrote:
> On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> > All attempts at providing setup_data have been made as an iteration on
> > whatever was there before, stretching back to the original
> > implementation used for DTBs that [mis]used the kernel image itself.
> > We've now had a dozen rounds of bugs and hacks, and the result is
> > turning into a pile of unmaintainable and increasingly brittle hacks.
> > 
> > Let's just rip out all the madness and start over. We can re-architect
> > this based on having a separate standalone setup_data file, which is how
> > it should have been done in the first place. This is a larger project
> > with a few things to coordinate, but we can't really begin thinking
> > about that while trying to play whack-a-mole with the current buggy
> > implementation.
> > 
> > So this commit removes the setup_data setting from x86_load_linux(),
> > while leaving intact the infrastructure we'll need in the future to try
> > again.
> > 
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Dov Murik <dovmurik@linux.ibm.com>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Richard Henderson <richard.henderson@linaro.org>
> > Cc: H. Peter Anvin <hpa@zytor.com>
> > Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> > Cc: Nathan Chancellor <nathan@kernel.org>
> > Cc: Borislav Petkov <bp@alien8.de>
> > Cc: Eric Biggers <ebiggers@kernel.org>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> 
> Thanks for the quick patch! Both of my use cases appear fixed.
> 
> Tested-by: Nathan Chancellor <nathan@kernel.org>

Unfortunately, I only tested qemu-system-x86_64 booting via EFI and just now came to
find out this breaks booting without EFI in both qemu-system-i386 and
qemu-system-x86_64 (just defconfig for both images):

$ qemu-system-i386 \
    -d unimp,guest_errors \
    -kernel arch/x86/boot/bzImage \
    -append "console=ttyS0 earlycon=uart8250,io,0x3f8" \
    -cpu host \
    -enable-kvm \
    -smp 8 \
    -display none \
    -initrd .../boot-utils-ro/images/x86/rootfs.cpio \
    -m 512m \
    -nodefaults \
    -no-reboot \
    -serial mon:stdio

There is no output, which I suppose makes sense since we are very early
in boot. Sorry for missing this initially, I'll ramp up my testing for
future patches.

Cheers,
Nathan

> > ---
> >  hw/i386/microvm.c |  15 ++----
> >  hw/i386/x86.c     | 120 +++++-----------------------------------------
> >  2 files changed, 17 insertions(+), 118 deletions(-)
> > 
> > diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> > index 29f30dd6d3..170a331e3f 100644
> > --- a/hw/i386/microvm.c
> > +++ b/hw/i386/microvm.c
> > @@ -378,8 +378,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
> >      MicrovmMachineState *mms = MICROVM_MACHINE(machine);
> >      BusState *bus;
> >      BusChild *kid;
> > -    char *cmdline, *existing_cmdline;
> > -    size_t len;
> > +    char *cmdline;
> >  
> >      /*
> >       * Find MMIO transports with attached devices, and add them to the kernel
> > @@ -388,8 +387,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
> >       * Yes, this is a hack, but one that heavily improves the UX without
> >       * introducing any significant issues.
> >       */
> > -    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
> > -    cmdline = g_strdup(existing_cmdline);
> > +    cmdline = g_strdup(machine->kernel_cmdline);
> >      bus = sysbus_get_default();
> >      QTAILQ_FOREACH(kid, &bus->children, sibling) {
> >          DeviceState *dev = kid->child;
> > @@ -413,12 +411,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
> >          }
> >      }
> >  
> > -    len = strlen(cmdline);
> > -    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
> > -        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
> > -    } else {
> > -        memcpy(existing_cmdline, cmdline, len + 1);
> > -    }
> > +    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
> > +    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
> > +
> >      g_free(cmdline);
> >  }
> >  
> > diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> > index eaff4227bd..6cfdca9acd 100644
> > --- a/hw/i386/x86.c
> > +++ b/hw/i386/x86.c
> > @@ -50,7 +50,6 @@
> >  #include "hw/intc/i8259.h"
> >  #include "hw/rtc/mc146818rtc.h"
> >  #include "target/i386/sev.h"
> > -#include "hw/i386/microvm.h"
> >  
> >  #include "hw/acpi/cpu_hotplug.h"
> >  #include "hw/irq.h"
> > @@ -770,30 +769,6 @@ static bool load_elfboot(const char *kernel_filename,
> >      return true;
> >  }
> >  
> > -typedef struct SetupDataFixup {
> > -    void *pos;
> > -    hwaddr orig_val, new_val;
> > -    uint32_t addr;
> > -} SetupDataFixup;
> > -
> > -static void fixup_setup_data(void *opaque)
> > -{
> > -    SetupDataFixup *fixup = opaque;
> > -    stq_p(fixup->pos, fixup->new_val);
> > -}
> > -
> > -static void reset_setup_data(void *opaque)
> > -{
> > -    SetupDataFixup *fixup = opaque;
> > -    stq_p(fixup->pos, fixup->orig_val);
> > -}
> > -
> > -static void reset_rng_seed(void *opaque)
> > -{
> > -    SetupData *setup_data = opaque;
> > -    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
> > -}
> > -
> >  void x86_load_linux(X86MachineState *x86ms,
> >                      FWCfgState *fw_cfg,
> >                      int acpi_data_size,
> > @@ -803,29 +778,20 @@ void x86_load_linux(X86MachineState *x86ms,
> >      bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
> >      uint16_t protocol;
> >      int setup_size, kernel_size, cmdline_size;
> > -    int dtb_size, setup_data_offset;
> >      uint32_t initrd_max;
> >      uint8_t header[8192], *setup, *kernel;
> > -    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
> > +    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
> >      FILE *f;
> >      char *vmode;
> >      MachineState *machine = MACHINE(x86ms);
> > -    SetupData *setup_data;
> >      const char *kernel_filename = machine->kernel_filename;
> >      const char *initrd_filename = machine->initrd_filename;
> > -    const char *dtb_filename = machine->dtb;
> > -    char *kernel_cmdline;
> > +    const char *kernel_cmdline = machine->kernel_cmdline;
> >      SevKernelLoaderContext sev_load_ctx = {};
> >      enum { RNG_SEED_LENGTH = 32 };
> >  
> > -    /*
> > -     * Add the NUL terminator, some padding for the microvm cmdline fiddling
> > -     * hack, and then align to 16 bytes as a paranoia measure
> > -     */
> > -    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
> > -                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
> > -    /* Make a copy, since we might append arbitrary bytes to it later. */
> > -    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
> > +    /* Align to 16 bytes as a paranoia measure */
> > +    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
> >  
> >      /* load the kernel header */
> >      f = fopen(kernel_filename, "rb");
> > @@ -966,6 +932,12 @@ void x86_load_linux(X86MachineState *x86ms,
> >          initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
> >      }
> >  
> > +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> > +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
> > +    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
> > +    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> > +    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
> > +
> >      if (protocol >= 0x202) {
> >          stl_p(header + 0x228, cmdline_addr);
> >      } else {
> > @@ -1078,81 +1050,13 @@ void x86_load_linux(X86MachineState *x86ms,
> >      }
> >      fclose(f);
> >  
> > -    /* append dtb to kernel */
> > -    if (dtb_filename) {
> > -        if (protocol < 0x209) {
> > -            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
> > -            exit(1);
> > -        }
> > -
> > -        dtb_size = get_image_size(dtb_filename);
> > -        if (dtb_size <= 0) {
> > -            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
> > -                    dtb_filename, strerror(errno));
> > -            exit(1);
> > -        }
> > -
> > -        setup_data_offset = cmdline_size;
> > -        cmdline_size += sizeof(SetupData) + dtb_size;
> > -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> > -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> > -        setup_data->next = cpu_to_le64(first_setup_data);
> > -        first_setup_data = cmdline_addr + setup_data_offset;
> > -        setup_data->type = cpu_to_le32(SETUP_DTB);
> > -        setup_data->len = cpu_to_le32(dtb_size);
> > -        load_image_size(dtb_filename, setup_data->data, dtb_size);
> > -    }
> > -
> > -    if (!legacy_no_rng_seed && protocol >= 0x209) {
> > -        setup_data_offset = cmdline_size;
> > -        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
> > -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> > -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> > -        setup_data->next = cpu_to_le64(first_setup_data);
> > -        first_setup_data = cmdline_addr + setup_data_offset;
> > -        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> > -        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> > -        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> > -        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
> > -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
> > -                                  setup_data, kernel, kernel_size, true);
> > -    } else {
> > -        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> > -    }
> > -
> > -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> > -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
> > -    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
> > -    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> > -    sev_load_ctx.cmdline_size = cmdline_size;
> > -
> > +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> >      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> >      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> >      sev_load_ctx.kernel_data = (char *)kernel;
> >      sev_load_ctx.kernel_size = kernel_size;
> >  
> > -    /*
> > -     * If we're starting an encrypted VM, it will be OVMF based, which uses the
> > -     * efi stub for booting and doesn't require any values to be placed in the
> > -     * kernel header.  We therefore don't update the header so the hash of the
> > -     * kernel on the other side of the fw_cfg interface matches the hash of the
> > -     * file the user passed in.
> > -     */
> > -    if (!sev_enabled() && first_setup_data) {
> > -        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
> > -
> > -        memcpy(setup, header, MIN(sizeof(header), setup_size));
> > -        /* Offset 0x250 is a pointer to the first setup_data link. */
> > -        fixup->pos = setup + 0x250;
> > -        fixup->orig_val = ldq_p(fixup->pos);
> > -        fixup->new_val = first_setup_data;
> > -        fixup->addr = cpu_to_le32(real_addr);
> > -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> > -                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> > -        qemu_register_reset(reset_setup_data, fixup);
> > -    } else {
> > -        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> > -    }
> > +    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> >      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
> >      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
> >      sev_load_ctx.setup_data = (char *)setup;
> > -- 
> > 2.39.1
> >
Dov Murik Feb. 8, 2023, 7:34 p.m. UTC | #11
On 08/02/2023 20:08, Jason A. Donenfeld wrote:
> All attempts at providing setup_data have been made as an iteration on
> whatever was there before, stretching back to the original
> implementation used for DTBs that [mis]used the kernel image itself.
> We've now had a dozen rounds of bugs and hacks, and the result is
> turning into a pile of unmaintainable and increasingly brittle hacks.
> 
> Let's just rip out all the madness and start over. We can re-architect
> this based on having a separate standalone setup_data file, which is how
> it should have been done in the first place. This is a larger project
> with a few things to coordinate, but we can't really begin thinking
> about that while trying to play whack-a-mole with the current buggy
> implementation.
> 
> So this commit removes the setup_data setting from x86_load_linux(),
> while leaving intact the infrastructure we'll need in the future to try
> again.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Dov Murik <dovmurik@linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> Cc: Nathan Chancellor <nathan@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

I understand from other replies that this patch might have some issues,
but I still tested it for SEV measured boot (OVMF AmdSev build).  This
patch fixes the issue and allows booting correctly.

I also verified that the calculated hashes match the ones calculated by
qemu 7.1.0.

So this fixes the previous issues:
* qemu 7.2.0 produces wrong kernel hash.
* qemu master (commit 6661b8c7fe) produces wrong cmdline hash.

-Dov


> ---
>  hw/i386/microvm.c |  15 ++----
>  hw/i386/x86.c     | 120 +++++-----------------------------------------
>  2 files changed, 17 insertions(+), 118 deletions(-)
> 
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> index 29f30dd6d3..170a331e3f 100644
> --- a/hw/i386/microvm.c
> +++ b/hw/i386/microvm.c
> @@ -378,8 +378,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>      MicrovmMachineState *mms = MICROVM_MACHINE(machine);
>      BusState *bus;
>      BusChild *kid;
> -    char *cmdline, *existing_cmdline;
> -    size_t len;
> +    char *cmdline;
>  
>      /*
>       * Find MMIO transports with attached devices, and add them to the kernel
> @@ -388,8 +387,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>       * Yes, this is a hack, but one that heavily improves the UX without
>       * introducing any significant issues.
>       */
> -    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
> -    cmdline = g_strdup(existing_cmdline);
> +    cmdline = g_strdup(machine->kernel_cmdline);
>      bus = sysbus_get_default();
>      QTAILQ_FOREACH(kid, &bus->children, sibling) {
>          DeviceState *dev = kid->child;
> @@ -413,12 +411,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>          }
>      }
>  
> -    len = strlen(cmdline);
> -    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
> -        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
> -    } else {
> -        memcpy(existing_cmdline, cmdline, len + 1);
> -    }
> +    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
> +    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
> +
>      g_free(cmdline);
>  }
>  
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index eaff4227bd..6cfdca9acd 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -50,7 +50,6 @@
>  #include "hw/intc/i8259.h"
>  #include "hw/rtc/mc146818rtc.h"
>  #include "target/i386/sev.h"
> -#include "hw/i386/microvm.h"
>  
>  #include "hw/acpi/cpu_hotplug.h"
>  #include "hw/irq.h"
> @@ -770,30 +769,6 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>  
> -typedef struct SetupDataFixup {
> -    void *pos;
> -    hwaddr orig_val, new_val;
> -    uint32_t addr;
> -} SetupDataFixup;
> -
> -static void fixup_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->new_val);
> -}
> -
> -static void reset_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->orig_val);
> -}
> -
> -static void reset_rng_seed(void *opaque)
> -{
> -    SetupData *setup_data = opaque;
> -    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
> -}
> -
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -803,29 +778,20 @@ void x86_load_linux(X86MachineState *x86ms,
>      bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
>      uint16_t protocol;
>      int setup_size, kernel_size, cmdline_size;
> -    int dtb_size, setup_data_offset;
>      uint32_t initrd_max;
>      uint8_t header[8192], *setup, *kernel;
> -    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
> +    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
>      FILE *f;
>      char *vmode;
>      MachineState *machine = MACHINE(x86ms);
> -    SetupData *setup_data;
>      const char *kernel_filename = machine->kernel_filename;
>      const char *initrd_filename = machine->initrd_filename;
> -    const char *dtb_filename = machine->dtb;
> -    char *kernel_cmdline;
> +    const char *kernel_cmdline = machine->kernel_cmdline;
>      SevKernelLoaderContext sev_load_ctx = {};
>      enum { RNG_SEED_LENGTH = 32 };
>  
> -    /*
> -     * Add the NUL terminator, some padding for the microvm cmdline fiddling
> -     * hack, and then align to 16 bytes as a paranoia measure
> -     */
> -    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
> -                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
> -    /* Make a copy, since we might append arbitrary bytes to it later. */
> -    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
> +    /* Align to 16 bytes as a paranoia measure */
> +    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
>  
>      /* load the kernel header */
>      f = fopen(kernel_filename, "rb");
> @@ -966,6 +932,12 @@ void x86_load_linux(X86MachineState *x86ms,
>          initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
>      }
>  
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
> +    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
> +    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> +    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
> +
>      if (protocol >= 0x202) {
>          stl_p(header + 0x228, cmdline_addr);
>      } else {
> @@ -1078,81 +1050,13 @@ void x86_load_linux(X86MachineState *x86ms,
>      }
>      fclose(f);
>  
> -    /* append dtb to kernel */
> -    if (dtb_filename) {
> -        if (protocol < 0x209) {
> -            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
> -            exit(1);
> -        }
> -
> -        dtb_size = get_image_size(dtb_filename);
> -        if (dtb_size <= 0) {
> -            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
> -                    dtb_filename, strerror(errno));
> -            exit(1);
> -        }
> -
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + dtb_size;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_DTB);
> -        setup_data->len = cpu_to_le32(dtb_size);
> -        load_image_size(dtb_filename, setup_data->data, dtb_size);
> -    }
> -
> -    if (!legacy_no_rng_seed && protocol >= 0x209) {
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> -        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> -        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> -        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
> -                                  setup_data, kernel, kernel_size, true);
> -    } else {
> -        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
> -    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> -    sev_load_ctx.cmdline_size = cmdline_size;
> -
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
>      sev_load_ctx.kernel_data = (char *)kernel;
>      sev_load_ctx.kernel_size = kernel_size;
>  
> -    /*
> -     * If we're starting an encrypted VM, it will be OVMF based, which uses the
> -     * efi stub for booting and doesn't require any values to be placed in the
> -     * kernel header.  We therefore don't update the header so the hash of the
> -     * kernel on the other side of the fw_cfg interface matches the hash of the
> -     * file the user passed in.
> -     */
> -    if (!sev_enabled() && first_setup_data) {
> -        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
> -
> -        memcpy(setup, header, MIN(sizeof(header), setup_size));
> -        /* Offset 0x250 is a pointer to the first setup_data link. */
> -        fixup->pos = setup + 0x250;
> -        fixup->orig_val = ldq_p(fixup->pos);
> -        fixup->new_val = first_setup_data;
> -        fixup->addr = cpu_to_le32(real_addr);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> -                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> -        qemu_register_reset(reset_setup_data, fixup);
> -    } else {
> -        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> -    }
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
Michael S. Tsirkin Feb. 8, 2023, 8:43 p.m. UTC | #12
On Wed, Feb 08, 2023 at 03:08:35PM -0300, Jason A. Donenfeld wrote:
> All attempts at providing setup_data have been made as an iteration on
> whatever was there before, stretching back to the original
> implementation used for DTBs that [mis]used the kernel image itself.
> We've now had a dozen rounds of bugs and hacks, and the result is
> turning into a pile of unmaintainable and increasingly brittle hacks.
> 
> Let's just rip out all the madness and start over. We can re-architect
> this based on having a separate standalone setup_data file, which is how
> it should have been done in the first place. This is a larger project
> with a few things to coordinate, but we can't really begin thinking
> about that while trying to play whack-a-mole with the current buggy
> implementation.
> 
> So this commit removes the setup_data setting from x86_load_linux(),
> while leaving intact the infrastructure we'll need in the future to try
> again.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Dov Murik <dovmurik@linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> Cc: Nathan Chancellor <nathan@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

So I think we have:

Fixes: 67f7e426e5 ("hw/i386: pass RNG seed via setup_data entry")
Fixes: eac7a7791b ("x86: don't let decompressed kernel image clobber setup_data")
Fixes: 14b29fea74 ("x86: do not re-randomize RNG seed on snapshot load")
Fixes: cc63374a5a ("x86: re-initialize RNG seed when selecting kernel")
Fixes: 763a2828bf ("x86: reinitialize RNG seed on system reboot")
Fixes: eebb38a563 ("x86: use typedef for SetupData struct")
Fixes: e935b73508 ("x86: return modified setup_data only if read as memory, not as file")



> ---
>  hw/i386/microvm.c |  15 ++----
>  hw/i386/x86.c     | 120 +++++-----------------------------------------
>  2 files changed, 17 insertions(+), 118 deletions(-)
> 
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> index 29f30dd6d3..170a331e3f 100644
> --- a/hw/i386/microvm.c
> +++ b/hw/i386/microvm.c
> @@ -378,8 +378,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>      MicrovmMachineState *mms = MICROVM_MACHINE(machine);
>      BusState *bus;
>      BusChild *kid;
> -    char *cmdline, *existing_cmdline;
> -    size_t len;
> +    char *cmdline;
>  
>      /*
>       * Find MMIO transports with attached devices, and add them to the kernel
> @@ -388,8 +387,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>       * Yes, this is a hack, but one that heavily improves the UX without
>       * introducing any significant issues.
>       */
> -    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
> -    cmdline = g_strdup(existing_cmdline);
> +    cmdline = g_strdup(machine->kernel_cmdline);
>      bus = sysbus_get_default();
>      QTAILQ_FOREACH(kid, &bus->children, sibling) {
>          DeviceState *dev = kid->child;
> @@ -413,12 +411,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine)
>          }
>      }
>  
> -    len = strlen(cmdline);
> -    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
> -        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
> -    } else {
> -        memcpy(existing_cmdline, cmdline, len + 1);
> -    }
> +    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
> +    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
> +
>      g_free(cmdline);
>  }
>  
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index eaff4227bd..6cfdca9acd 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -50,7 +50,6 @@
>  #include "hw/intc/i8259.h"
>  #include "hw/rtc/mc146818rtc.h"
>  #include "target/i386/sev.h"
> -#include "hw/i386/microvm.h"
>  
>  #include "hw/acpi/cpu_hotplug.h"
>  #include "hw/irq.h"
> @@ -770,30 +769,6 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>  
> -typedef struct SetupDataFixup {
> -    void *pos;
> -    hwaddr orig_val, new_val;
> -    uint32_t addr;
> -} SetupDataFixup;
> -
> -static void fixup_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->new_val);
> -}
> -
> -static void reset_setup_data(void *opaque)
> -{
> -    SetupDataFixup *fixup = opaque;
> -    stq_p(fixup->pos, fixup->orig_val);
> -}
> -
> -static void reset_rng_seed(void *opaque)
> -{
> -    SetupData *setup_data = opaque;
> -    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
> -}
> -
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -803,29 +778,20 @@ void x86_load_linux(X86MachineState *x86ms,
>      bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
>      uint16_t protocol;
>      int setup_size, kernel_size, cmdline_size;
> -    int dtb_size, setup_data_offset;
>      uint32_t initrd_max;
>      uint8_t header[8192], *setup, *kernel;
> -    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
> +    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
>      FILE *f;
>      char *vmode;
>      MachineState *machine = MACHINE(x86ms);
> -    SetupData *setup_data;
>      const char *kernel_filename = machine->kernel_filename;
>      const char *initrd_filename = machine->initrd_filename;
> -    const char *dtb_filename = machine->dtb;
> -    char *kernel_cmdline;
> +    const char *kernel_cmdline = machine->kernel_cmdline;
>      SevKernelLoaderContext sev_load_ctx = {};
>      enum { RNG_SEED_LENGTH = 32 };
>  
> -    /*
> -     * Add the NUL terminator, some padding for the microvm cmdline fiddling
> -     * hack, and then align to 16 bytes as a paranoia measure
> -     */
> -    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
> -                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
> -    /* Make a copy, since we might append arbitrary bytes to it later. */
> -    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
> +    /* Align to 16 bytes as a paranoia measure */
> +    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
>  
>      /* load the kernel header */
>      f = fopen(kernel_filename, "rb");
> @@ -966,6 +932,12 @@ void x86_load_linux(X86MachineState *x86ms,
>          initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
>      }
>  
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
> +    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
> +    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> +    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
> +
>      if (protocol >= 0x202) {
>          stl_p(header + 0x228, cmdline_addr);
>      } else {
> @@ -1078,81 +1050,13 @@ void x86_load_linux(X86MachineState *x86ms,
>      }
>      fclose(f);
>  
> -    /* append dtb to kernel */
> -    if (dtb_filename) {
> -        if (protocol < 0x209) {
> -            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
> -            exit(1);
> -        }
> -
> -        dtb_size = get_image_size(dtb_filename);
> -        if (dtb_size <= 0) {
> -            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
> -                    dtb_filename, strerror(errno));
> -            exit(1);
> -        }
> -
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + dtb_size;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_DTB);
> -        setup_data->len = cpu_to_le32(dtb_size);
> -        load_image_size(dtb_filename, setup_data->data, dtb_size);
> -    }
> -
> -    if (!legacy_no_rng_seed && protocol >= 0x209) {
> -        setup_data_offset = cmdline_size;
> -        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
> -        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
> -        setup_data = (void *)kernel_cmdline + setup_data_offset;
> -        setup_data->next = cpu_to_le64(first_setup_data);
> -        first_setup_data = cmdline_addr + setup_data_offset;
> -        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> -        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> -        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> -        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
> -                                  setup_data, kernel, kernel_size, true);
> -    } else {
> -        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
> -    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
> -    sev_load_ctx.cmdline_size = cmdline_size;
> -
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
>      sev_load_ctx.kernel_data = (char *)kernel;
>      sev_load_ctx.kernel_size = kernel_size;
>  
> -    /*
> -     * If we're starting an encrypted VM, it will be OVMF based, which uses the
> -     * efi stub for booting and doesn't require any values to be placed in the
> -     * kernel header.  We therefore don't update the header so the hash of the
> -     * kernel on the other side of the fw_cfg interface matches the hash of the
> -     * file the user passed in.
> -     */
> -    if (!sev_enabled() && first_setup_data) {
> -        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
> -
> -        memcpy(setup, header, MIN(sizeof(header), setup_size));
> -        /* Offset 0x250 is a pointer to the first setup_data link. */
> -        fixup->pos = setup + 0x250;
> -        fixup->orig_val = ldq_p(fixup->pos);
> -        fixup->new_val = first_setup_data;
> -        fixup->addr = cpu_to_le32(real_addr);
> -        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> -                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> -        qemu_register_reset(reset_setup_data, fixup);
> -    } else {
> -        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
> -    }
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> -- 
> 2.39.1
diff mbox series

Patch

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 29f30dd6d3..170a331e3f 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -378,8 +378,7 @@  static void microvm_fix_kernel_cmdline(MachineState *machine)
     MicrovmMachineState *mms = MICROVM_MACHINE(machine);
     BusState *bus;
     BusChild *kid;
-    char *cmdline, *existing_cmdline;
-    size_t len;
+    char *cmdline;
 
     /*
      * Find MMIO transports with attached devices, and add them to the kernel
@@ -388,8 +387,7 @@  static void microvm_fix_kernel_cmdline(MachineState *machine)
      * Yes, this is a hack, but one that heavily improves the UX without
      * introducing any significant issues.
      */
-    existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA);
-    cmdline = g_strdup(existing_cmdline);
+    cmdline = g_strdup(machine->kernel_cmdline);
     bus = sysbus_get_default();
     QTAILQ_FOREACH(kid, &bus->children, sibling) {
         DeviceState *dev = kid->child;
@@ -413,12 +411,9 @@  static void microvm_fix_kernel_cmdline(MachineState *machine)
         }
     }
 
-    len = strlen(cmdline);
-    if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) {
-        fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n");
-    } else {
-        memcpy(existing_cmdline, cmdline, len + 1);
-    }
+    fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1);
+    fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline);
+
     g_free(cmdline);
 }
 
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index eaff4227bd..6cfdca9acd 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -50,7 +50,6 @@ 
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "target/i386/sev.h"
-#include "hw/i386/microvm.h"
 
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -770,30 +769,6 @@  static bool load_elfboot(const char *kernel_filename,
     return true;
 }
 
-typedef struct SetupDataFixup {
-    void *pos;
-    hwaddr orig_val, new_val;
-    uint32_t addr;
-} SetupDataFixup;
-
-static void fixup_setup_data(void *opaque)
-{
-    SetupDataFixup *fixup = opaque;
-    stq_p(fixup->pos, fixup->new_val);
-}
-
-static void reset_setup_data(void *opaque)
-{
-    SetupDataFixup *fixup = opaque;
-    stq_p(fixup->pos, fixup->orig_val);
-}
-
-static void reset_rng_seed(void *opaque)
-{
-    SetupData *setup_data = opaque;
-    qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
-}
-
 void x86_load_linux(X86MachineState *x86ms,
                     FWCfgState *fw_cfg,
                     int acpi_data_size,
@@ -803,29 +778,20 @@  void x86_load_linux(X86MachineState *x86ms,
     bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
     uint16_t protocol;
     int setup_size, kernel_size, cmdline_size;
-    int dtb_size, setup_data_offset;
     uint32_t initrd_max;
     uint8_t header[8192], *setup, *kernel;
-    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
+    hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
     FILE *f;
     char *vmode;
     MachineState *machine = MACHINE(x86ms);
-    SetupData *setup_data;
     const char *kernel_filename = machine->kernel_filename;
     const char *initrd_filename = machine->initrd_filename;
-    const char *dtb_filename = machine->dtb;
-    char *kernel_cmdline;
+    const char *kernel_cmdline = machine->kernel_cmdline;
     SevKernelLoaderContext sev_load_ctx = {};
     enum { RNG_SEED_LENGTH = 32 };
 
-    /*
-     * Add the NUL terminator, some padding for the microvm cmdline fiddling
-     * hack, and then align to 16 bytes as a paranoia measure
-     */
-    cmdline_size = (strlen(machine->kernel_cmdline) + 1 +
-                    VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15;
-    /* Make a copy, since we might append arbitrary bytes to it later. */
-    kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size);
+    /* Align to 16 bytes as a paranoia measure */
+    cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
 
     /* load the kernel header */
     f = fopen(kernel_filename, "rb");
@@ -966,6 +932,12 @@  void x86_load_linux(X86MachineState *x86ms,
         initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
     }
 
+    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
+    fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
+    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
+
     if (protocol >= 0x202) {
         stl_p(header + 0x228, cmdline_addr);
     } else {
@@ -1078,81 +1050,13 @@  void x86_load_linux(X86MachineState *x86ms,
     }
     fclose(f);
 
-    /* append dtb to kernel */
-    if (dtb_filename) {
-        if (protocol < 0x209) {
-            fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
-            exit(1);
-        }
-
-        dtb_size = get_image_size(dtb_filename);
-        if (dtb_size <= 0) {
-            fprintf(stderr, "qemu: error reading dtb %s: %s\n",
-                    dtb_filename, strerror(errno));
-            exit(1);
-        }
-
-        setup_data_offset = cmdline_size;
-        cmdline_size += sizeof(SetupData) + dtb_size;
-        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
-        setup_data = (void *)kernel_cmdline + setup_data_offset;
-        setup_data->next = cpu_to_le64(first_setup_data);
-        first_setup_data = cmdline_addr + setup_data_offset;
-        setup_data->type = cpu_to_le32(SETUP_DTB);
-        setup_data->len = cpu_to_le32(dtb_size);
-        load_image_size(dtb_filename, setup_data->data, dtb_size);
-    }
-
-    if (!legacy_no_rng_seed && protocol >= 0x209) {
-        setup_data_offset = cmdline_size;
-        cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH;
-        kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size);
-        setup_data = (void *)kernel_cmdline + setup_data_offset;
-        setup_data->next = cpu_to_le64(first_setup_data);
-        first_setup_data = cmdline_addr + setup_data_offset;
-        setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
-        setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
-        qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
-        qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
-        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
-                                  setup_data, kernel, kernel_size, true);
-    } else {
-        fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
-    }
-
-    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
-    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size);
-    fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size);
-    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
-    sev_load_ctx.cmdline_size = cmdline_size;
-
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
     sev_load_ctx.kernel_data = (char *)kernel;
     sev_load_ctx.kernel_size = kernel_size;
 
-    /*
-     * If we're starting an encrypted VM, it will be OVMF based, which uses the
-     * efi stub for booting and doesn't require any values to be placed in the
-     * kernel header.  We therefore don't update the header so the hash of the
-     * kernel on the other side of the fw_cfg interface matches the hash of the
-     * file the user passed in.
-     */
-    if (!sev_enabled() && first_setup_data) {
-        SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
-
-        memcpy(setup, header, MIN(sizeof(header), setup_size));
-        /* Offset 0x250 is a pointer to the first setup_data link. */
-        fixup->pos = setup + 0x250;
-        fixup->orig_val = ldq_p(fixup->pos);
-        fixup->new_val = first_setup_data;
-        fixup->addr = cpu_to_le32(real_addr);
-        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
-                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
-        qemu_register_reset(reset_setup_data, fixup);
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
-    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
     sev_load_ctx.setup_data = (char *)setup;