diff mbox series

[v3,48/49] hw/i386/sev: Use guest_memfd for legacy ROMs

Message ID 20240320083945.991426-49-michael.roth@amd.com
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) support | expand

Commit Message

Michael Roth March 20, 2024, 8:39 a.m. UTC
TODO: make this SNP-specific if TDX disables legacy ROMs in general

Current SNP guest kernels will attempt to access these regions with
with C-bit set, so guest_memfd is needed to handle that. Otherwise,
kvm_convert_memory() will fail when the guest kernel tries to access it
and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
to private.

Whether guests should actually try to access ROM regions in this way (or
need to deal with legacy ROM regions at all), is a separate issue to be
addressed on kernel side, but current SNP guest kernels will exhibit
this behavior and so this handling is needed to allow QEMU to continue
running existing SNP guest kernels.

Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 hw/i386/pc.c       | 13 +++++++++----
 hw/i386/pc_sysfw.c | 13 ++++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

Comments

Isaku Yamahata March 20, 2024, 6:12 p.m. UTC | #1
On Wed, Mar 20, 2024 at 03:39:44AM -0500,
Michael Roth <michael.roth@amd.com> wrote:

> TODO: make this SNP-specific if TDX disables legacy ROMs in general

TDX disables pc.rom, not disable isa-bios. IIRC, TDX doesn't need pc pflash.
Xiaoyao can chime in.

Thanks,

> 
> Current SNP guest kernels will attempt to access these regions with
> with C-bit set, so guest_memfd is needed to handle that. Otherwise,
> kvm_convert_memory() will fail when the guest kernel tries to access it
> and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
> to private.
> 
> Whether guests should actually try to access ROM regions in this way (or
> need to deal with legacy ROM regions at all), is a separate issue to be
> addressed on kernel side, but current SNP guest kernels will exhibit
> this behavior and so this handling is needed to allow QEMU to continue
> running existing SNP guest kernels.
> 
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> ---
>  hw/i386/pc.c       | 13 +++++++++----
>  hw/i386/pc_sysfw.c | 13 ++++++++++---
>  2 files changed, 19 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index feb7a93083..5feaeb43ee 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1011,10 +1011,15 @@ void pc_memory_init(PCMachineState *pcms,
>      pc_system_firmware_init(pcms, rom_memory);
>  
>      option_rom_mr = g_malloc(sizeof(*option_rom_mr));
> -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> -                           &error_fatal);
> -    if (pcmc->pci_enabled) {
> -        memory_region_set_readonly(option_rom_mr, true);
> +    if (machine_require_guest_memfd(machine)) {
> +        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
> +                                           PC_ROM_SIZE, &error_fatal);
> +    } else {
> +        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> +                               &error_fatal);
> +        if (pcmc->pci_enabled) {
> +            memory_region_set_readonly(option_rom_mr, true);
> +        }
>      }
>      memory_region_add_subregion_overlap(rom_memory,
>                                          PC_ROM_MIN_VGA,
> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
> index 9dbb3f7337..850f86edd4 100644
> --- a/hw/i386/pc_sysfw.c
> +++ b/hw/i386/pc_sysfw.c
> @@ -54,8 +54,13 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
>      /* map the last 128KB of the BIOS in ISA space */
>      isa_bios_size = MIN(flash_size, 128 * KiB);
>      isa_bios = g_malloc(sizeof(*isa_bios));
> -    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> -                           &error_fatal);
> +    if (machine_require_guest_memfd(current_machine)) {
> +        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
> +                                           isa_bios_size, &error_fatal);
> +    } else {
> +        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> +                               &error_fatal);
> +    }
>      memory_region_add_subregion_overlap(rom_memory,
>                                          0x100000 - isa_bios_size,
>                                          isa_bios,
> @@ -68,7 +73,9 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
>             ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
>             isa_bios_size);
>  
> -    memory_region_set_readonly(isa_bios, true);
> +    if (!machine_require_guest_memfd(current_machine)) {
> +        memory_region_set_readonly(isa_bios, true);
> +    }
>  }
>  
>  static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
> -- 
> 2.25.1
> 
>
Xiaoyao Li March 28, 2024, 12:45 a.m. UTC | #2
On 3/21/2024 2:12 AM, Isaku Yamahata wrote:
> On Wed, Mar 20, 2024 at 03:39:44AM -0500,
> Michael Roth <michael.roth@amd.com> wrote:
> 
>> TODO: make this SNP-specific if TDX disables legacy ROMs in general
> 
> TDX disables pc.rom, not disable isa-bios. IIRC, TDX doesn't need pc pflash.

Not TDX doesn't need pc pflash, but TDX cannot support pflash.

Can SNP support the behavior of pflash? That what's got changed will be 
synced back to OVMF file?

> Xiaoyao can chime in.
> 
> Thanks,
> 
>>
>> Current SNP guest kernels will attempt to access these regions with
>> with C-bit set, so guest_memfd is needed to handle that. Otherwise,
>> kvm_convert_memory() will fail when the guest kernel tries to access it
>> and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
>> to private.
>>
>> Whether guests should actually try to access ROM regions in this way (or
>> need to deal with legacy ROM regions at all), is a separate issue to be
>> addressed on kernel side, but current SNP guest kernels will exhibit
>> this behavior and so this handling is needed to allow QEMU to continue
>> running existing SNP guest kernels.
>>
>> Signed-off-by: Michael Roth <michael.roth@amd.com>
>> ---
>>   hw/i386/pc.c       | 13 +++++++++----
>>   hw/i386/pc_sysfw.c | 13 ++++++++++---
>>   2 files changed, 19 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index feb7a93083..5feaeb43ee 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1011,10 +1011,15 @@ void pc_memory_init(PCMachineState *pcms,
>>       pc_system_firmware_init(pcms, rom_memory);
>>   
>>       option_rom_mr = g_malloc(sizeof(*option_rom_mr));
>> -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
>> -                           &error_fatal);
>> -    if (pcmc->pci_enabled) {
>> -        memory_region_set_readonly(option_rom_mr, true);
>> +    if (machine_require_guest_memfd(machine)) {
>> +        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
>> +                                           PC_ROM_SIZE, &error_fatal);
>> +    } else {
>> +        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
>> +                               &error_fatal);
>> +        if (pcmc->pci_enabled) {
>> +            memory_region_set_readonly(option_rom_mr, true);
>> +        }
>>       }
>>       memory_region_add_subregion_overlap(rom_memory,
>>                                           PC_ROM_MIN_VGA,
>> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
>> index 9dbb3f7337..850f86edd4 100644
>> --- a/hw/i386/pc_sysfw.c
>> +++ b/hw/i386/pc_sysfw.c
>> @@ -54,8 +54,13 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
>>       /* map the last 128KB of the BIOS in ISA space */
>>       isa_bios_size = MIN(flash_size, 128 * KiB);
>>       isa_bios = g_malloc(sizeof(*isa_bios));
>> -    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
>> -                           &error_fatal);
>> +    if (machine_require_guest_memfd(current_machine)) {
>> +        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
>> +                                           isa_bios_size, &error_fatal);
>> +    } else {
>> +        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
>> +                               &error_fatal);
>> +    }
>>       memory_region_add_subregion_overlap(rom_memory,
>>                                           0x100000 - isa_bios_size,
>>                                           isa_bios,
>> @@ -68,7 +73,9 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
>>              ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
>>              isa_bios_size);
>>   
>> -    memory_region_set_readonly(isa_bios, true);
>> +    if (!machine_require_guest_memfd(current_machine)) {
>> +        memory_region_set_readonly(isa_bios, true);
>> +    }
>>   }
>>   
>>   static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
>> -- 
>> 2.25.1
>>
>>
>
Michael Roth April 24, 2024, 12:08 a.m. UTC | #3
On Thu, Mar 28, 2024 at 08:45:03AM +0800, Xiaoyao Li wrote:
> On 3/21/2024 2:12 AM, Isaku Yamahata wrote:
> > On Wed, Mar 20, 2024 at 03:39:44AM -0500,
> > Michael Roth <michael.roth@amd.com> wrote:
> > 
> > > TODO: make this SNP-specific if TDX disables legacy ROMs in general
> > 
> > TDX disables pc.rom, not disable isa-bios. IIRC, TDX doesn't need pc pflash.
> 
> Not TDX doesn't need pc pflash, but TDX cannot support pflash.
> 
> Can SNP support the behavior of pflash? That what's got changed will be
> synced back to OVMF file?

For split OVMF files (separate FD for CODE vs. VARS) it can, but it
requires special handling in OVMF to handle MMIO to the VARS area using
direct MMIO hypercalls rather than relying on MMIO emulation. Here's the
relevant OVMF commit:

  commit 437eb3f7a8db7681afe0e6064d3a8edb12abb766
  Author: Tom Lendacky <thomas.lendacky@amd.com>
  Date:   Wed Aug 12 15:21:42 2020 -0500

      OvmfPkg/QemuFlashFvbServicesRuntimeDxe: Bypass flash detection with SEV-ES

      BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=2198

For SNP, the plan is to continue to use -bios to handle the actual
CODE/BIOS FD, but to allow the use of pflash,unit=0 to handle the VARS
fd if a separate/persistent store is desired. This allows us to continue
using read-only memslots on the VARS/pflash side without being at odds
with the fact that read-only memslots are no longer supported for private
memslots (since VARS doesn't need to be measured/mapped as private), and
limiting the special handling to -bios where TDX/SNP both need private
memslots.

This is roughly how things will look with v4 of this series:

  https://github.com/AMDESE/qemu/commit/21fff075372ad25b2d09c5e416349c2b353fdb4c

I think (if needed) TDX could in theory take a similar approach with
similar modifications to OVMF and providing an option for a split CODE/VARS
variant.

-Mike

> 
> > Xiaoyao can chime in.
> > 
> > Thanks,
> > 
> > > 
> > > Current SNP guest kernels will attempt to access these regions with
> > > with C-bit set, so guest_memfd is needed to handle that. Otherwise,
> > > kvm_convert_memory() will fail when the guest kernel tries to access it
> > > and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
> > > to private.
> > > 
> > > Whether guests should actually try to access ROM regions in this way (or
> > > need to deal with legacy ROM regions at all), is a separate issue to be
> > > addressed on kernel side, but current SNP guest kernels will exhibit
> > > this behavior and so this handling is needed to allow QEMU to continue
> > > running existing SNP guest kernels.
> > > 
> > > Signed-off-by: Michael Roth <michael.roth@amd.com>
> > > ---
> > >   hw/i386/pc.c       | 13 +++++++++----
> > >   hw/i386/pc_sysfw.c | 13 ++++++++++---
> > >   2 files changed, 19 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index feb7a93083..5feaeb43ee 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -1011,10 +1011,15 @@ void pc_memory_init(PCMachineState *pcms,
> > >       pc_system_firmware_init(pcms, rom_memory);
> > >       option_rom_mr = g_malloc(sizeof(*option_rom_mr));
> > > -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> > > -                           &error_fatal);
> > > -    if (pcmc->pci_enabled) {
> > > -        memory_region_set_readonly(option_rom_mr, true);
> > > +    if (machine_require_guest_memfd(machine)) {
> > > +        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
> > > +                                           PC_ROM_SIZE, &error_fatal);
> > > +    } else {
> > > +        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> > > +                               &error_fatal);
> > > +        if (pcmc->pci_enabled) {
> > > +            memory_region_set_readonly(option_rom_mr, true);
> > > +        }
> > >       }
> > >       memory_region_add_subregion_overlap(rom_memory,
> > >                                           PC_ROM_MIN_VGA,
> > > diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
> > > index 9dbb3f7337..850f86edd4 100644
> > > --- a/hw/i386/pc_sysfw.c
> > > +++ b/hw/i386/pc_sysfw.c
> > > @@ -54,8 +54,13 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
> > >       /* map the last 128KB of the BIOS in ISA space */
> > >       isa_bios_size = MIN(flash_size, 128 * KiB);
> > >       isa_bios = g_malloc(sizeof(*isa_bios));
> > > -    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> > > -                           &error_fatal);
> > > +    if (machine_require_guest_memfd(current_machine)) {
> > > +        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
> > > +                                           isa_bios_size, &error_fatal);
> > > +    } else {
> > > +        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> > > +                               &error_fatal);
> > > +    }
> > >       memory_region_add_subregion_overlap(rom_memory,
> > >                                           0x100000 - isa_bios_size,
> > >                                           isa_bios,
> > > @@ -68,7 +73,9 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
> > >              ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
> > >              isa_bios_size);
> > > -    memory_region_set_readonly(isa_bios, true);
> > > +    if (!machine_require_guest_memfd(current_machine)) {
> > > +        memory_region_set_readonly(isa_bios, true);
> > > +    }
> > >   }
> > >   static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
> > > -- 
> > > 2.25.1
> > > 
> > > 
> > 
> 
>
diff mbox series

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index feb7a93083..5feaeb43ee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1011,10 +1011,15 @@  void pc_memory_init(PCMachineState *pcms,
     pc_system_firmware_init(pcms, rom_memory);
 
     option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
-                           &error_fatal);
-    if (pcmc->pci_enabled) {
-        memory_region_set_readonly(option_rom_mr, true);
+    if (machine_require_guest_memfd(machine)) {
+        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+                                           PC_ROM_SIZE, &error_fatal);
+    } else {
+        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+                               &error_fatal);
+        if (pcmc->pci_enabled) {
+            memory_region_set_readonly(option_rom_mr, true);
+        }
     }
     memory_region_add_subregion_overlap(rom_memory,
                                         PC_ROM_MIN_VGA,
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 9dbb3f7337..850f86edd4 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -54,8 +54,13 @@  static void pc_isa_bios_init(MemoryRegion *rom_memory,
     /* map the last 128KB of the BIOS in ISA space */
     isa_bios_size = MIN(flash_size, 128 * KiB);
     isa_bios = g_malloc(sizeof(*isa_bios));
-    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
-                           &error_fatal);
+    if (machine_require_guest_memfd(current_machine)) {
+        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
+                                           isa_bios_size, &error_fatal);
+    } else {
+        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
+                               &error_fatal);
+    }
     memory_region_add_subregion_overlap(rom_memory,
                                         0x100000 - isa_bios_size,
                                         isa_bios,
@@ -68,7 +73,9 @@  static void pc_isa_bios_init(MemoryRegion *rom_memory,
            ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
            isa_bios_size);
 
-    memory_region_set_readonly(isa_bios, true);
+    if (!machine_require_guest_memfd(current_machine)) {
+        memory_region_set_readonly(isa_bios, true);
+    }
 }
 
 static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,