[PATCH v4 27/31] hw/i386/sev: Use guest_memfd for legacy ROMs

Pankaj Gupta posted 31 patches 5 months, 4 weeks ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Marcelo Tosatti <mtosatti@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Cornelia Huck <cohuck@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>
There is a newer version of this series
[PATCH v4 27/31] hw/i386/sev: Use guest_memfd for legacy ROMs
Posted by Pankaj Gupta 5 months, 4 weeks ago
From: Michael Roth <michael.roth@amd.com>

Current SNP guest kernels will attempt to access these regions with
with C-bit set, so guest_memfd is needed to handle that. Otherwise,
kvm_convert_memory() will fail when the guest kernel tries to access it
and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
to private.

Whether guests should actually try to access ROM regions in this way (or
need to deal with legacy ROM regions at all), is a separate issue to be
addressed on kernel side, but current SNP guest kernels will exhibit
this behavior and so this handling is needed to allow QEMU to continue
running existing SNP guest kernels.

Signed-off-by: Michael Roth <michael.roth@amd.com>
[pankaj: Added sev_snp_enabled() check]
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
---
 hw/i386/pc.c       | 14 ++++++++++----
 hw/i386/pc_sysfw.c | 13 ++++++++++---
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7b638da7aa..62c25ea1e9 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -62,6 +62,7 @@
 #include "hw/mem/memory-device.h"
 #include "e820_memory_layout.h"
 #include "trace.h"
+#include "sev.h"
 #include CONFIG_DEVICES
 
 #ifdef CONFIG_XEN_EMU
@@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms,
     pc_system_firmware_init(pcms, rom_memory);
 
     option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
-                           &error_fatal);
-    if (pcmc->pci_enabled) {
-        memory_region_set_readonly(option_rom_mr, true);
+    if (sev_snp_enabled()) {
+        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+                                           PC_ROM_SIZE, &error_fatal);
+    } else {
+        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+                               &error_fatal);
+        if (pcmc->pci_enabled) {
+            memory_region_set_readonly(option_rom_mr, true);
+        }
     }
     memory_region_add_subregion_overlap(rom_memory,
                                         PC_ROM_MIN_VGA,
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 00464afcb4..def77a442d 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
 
     /* map the last 128KB of the BIOS in ISA space */
     isa_bios_size = MIN(flash_size, 128 * KiB);
-    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
-                           &error_fatal);
+    if (sev_snp_enabled()) {
+        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
+                                           isa_bios_size, &error_fatal);
+    } else {
+        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
+                               &error_fatal);
+    }
     memory_region_add_subregion_overlap(rom_memory,
                                         0x100000 - isa_bios_size,
                                         isa_bios,
@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
            ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
            isa_bios_size);
 
-    memory_region_set_readonly(isa_bios, true);
+    if (!machine_require_guest_memfd(current_machine)) {
+        memory_region_set_readonly(isa_bios, true);
+    }
 }
 
 static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
-- 
2.34.1
Re: [PATCH v4 27/31] hw/i386/sev: Use guest_memfd for legacy ROMs
Posted by Xiaoyao Li 5 months, 1 week ago
On 5/30/2024 7:16 PM, Pankaj Gupta wrote:
> From: Michael Roth <michael.roth@amd.com>
> 
> Current SNP guest kernels will attempt to access these regions with
> with C-bit set, so guest_memfd is needed to handle that. Otherwise,
> kvm_convert_memory() will fail when the guest kernel tries to access it
> and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
> to private.
> 
> Whether guests should actually try to access ROM regions in this way (or
> need to deal with legacy ROM regions at all), is a separate issue to be
> addressed on kernel side, but current SNP guest kernels will exhibit
> this behavior and so this handling is needed to allow QEMU to continue
> running existing SNP guest kernels.
> 
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> [pankaj: Added sev_snp_enabled() check]
> Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
> ---
>   hw/i386/pc.c       | 14 ++++++++++----
>   hw/i386/pc_sysfw.c | 13 ++++++++++---
>   2 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 7b638da7aa..62c25ea1e9 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -62,6 +62,7 @@
>   #include "hw/mem/memory-device.h"
>   #include "e820_memory_layout.h"
>   #include "trace.h"
> +#include "sev.h"
>   #include CONFIG_DEVICES
>   
>   #ifdef CONFIG_XEN_EMU
> @@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms,
>       pc_system_firmware_init(pcms, rom_memory);
>   
>       option_rom_mr = g_malloc(sizeof(*option_rom_mr));
> -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> -                           &error_fatal);
> -    if (pcmc->pci_enabled) {
> -        memory_region_set_readonly(option_rom_mr, true);
> +    if (sev_snp_enabled()) {
> +        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
> +                                           PC_ROM_SIZE, &error_fatal);
> +    } else {
> +        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> +                               &error_fatal);
> +        if (pcmc->pci_enabled) {
> +            memory_region_set_readonly(option_rom_mr, true);
> +        }
>       }
>       memory_region_add_subregion_overlap(rom_memory,
>                                           PC_ROM_MIN_VGA,
> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
> index 00464afcb4..def77a442d 100644
> --- a/hw/i386/pc_sysfw.c
> +++ b/hw/i386/pc_sysfw.c
> @@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
>   
>       /* map the last 128KB of the BIOS in ISA space */
>       isa_bios_size = MIN(flash_size, 128 * KiB);
> -    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> -                           &error_fatal);
> +    if (sev_snp_enabled()) {
> +        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
> +                                           isa_bios_size, &error_fatal);
> +    } else {
> +        memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
> +                               &error_fatal);
> +    }
>       memory_region_add_subregion_overlap(rom_memory,
>                                           0x100000 - isa_bios_size,
>                                           isa_bios,
> @@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
>              ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
>              isa_bios_size);
>   
> -    memory_region_set_readonly(isa_bios, true);
> +    if (!machine_require_guest_memfd(current_machine)) {
> +        memory_region_set_readonly(isa_bios, true);
> +    }

This patch takes different approach than next patch that this patch 
chooses to not set readonly when require guest memfd while next patch 
skips the whole isa_bios setup for -bios case. Why make different 
handling for the two cases?

More importantly, with commit a44ea3fa7f2a,
pcmc->isa_bios_alias is default true for all new machine after 9.0, then 
pc_isa_bios_init() would be hit even on plash case. It will call 
x86_isa_bios_init() in pc_system_flash_map().

So with -bios case, the call site is

   pc_system_firmware_init()
      -> x86_bios_rom_init()
	-> x86_isa_bios_init()

   because require_guest_memfd is true for snp, x86_isa_bios_init() is 
not called.

However, with pflash case, the call site is

   pc_system_firmware_init()
      -> pc_system_flash_map()
	 -> if (pcmc->isa_bios_alias) {
                 x86_isa_bios_init();
             } else {
                 pc_isa_bios_init();
             }

As I said above, pcmc->isa_bios_alias is true for machine after 9.0, so 
it will goes x86_isa_bios_init().

So please anyone explain to me why x86_isa_bios_init() is ok for pflash 
case but not for -bios case?

>   }
>   
>   static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
Re: [PATCH v4 27/31] hw/i386/sev: Use guest_memfd for legacy ROMs
Posted by Gupta, Pankaj 5 months, 1 week ago
On 6/14/2024 10:58 AM, Xiaoyao Li wrote:
> On 5/30/2024 7:16 PM, Pankaj Gupta wrote:
>> From: Michael Roth <michael.roth@amd.com>
>>
>> Current SNP guest kernels will attempt to access these regions with
>> with C-bit set, so guest_memfd is needed to handle that. Otherwise,
>> kvm_convert_memory() will fail when the guest kernel tries to access it
>> and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
>> to private.
>>
>> Whether guests should actually try to access ROM regions in this way (or
>> need to deal with legacy ROM regions at all), is a separate issue to be
>> addressed on kernel side, but current SNP guest kernels will exhibit
>> this behavior and so this handling is needed to allow QEMU to continue
>> running existing SNP guest kernels.
>>
>> Signed-off-by: Michael Roth <michael.roth@amd.com>
>> [pankaj: Added sev_snp_enabled() check]
>> Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
>> ---
>>   hw/i386/pc.c       | 14 ++++++++++----
>>   hw/i386/pc_sysfw.c | 13 ++++++++++---
>>   2 files changed, 20 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 7b638da7aa..62c25ea1e9 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -62,6 +62,7 @@
>>   #include "hw/mem/memory-device.h"
>>   #include "e820_memory_layout.h"
>>   #include "trace.h"
>> +#include "sev.h"
>>   #include CONFIG_DEVICES
>>   #ifdef CONFIG_XEN_EMU
>> @@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms,
>>       pc_system_firmware_init(pcms, rom_memory);
>>       option_rom_mr = g_malloc(sizeof(*option_rom_mr));
>> -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
>> -                           &error_fatal);
>> -    if (pcmc->pci_enabled) {
>> -        memory_region_set_readonly(option_rom_mr, true);
>> +    if (sev_snp_enabled()) {
>> +        memory_region_init_ram_guest_memfd(option_rom_mr, NULL, 
>> "pc.rom",
>> +                                           PC_ROM_SIZE, &error_fatal);
>> +    } else {
>> +        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", 
>> PC_ROM_SIZE,
>> +                               &error_fatal);
>> +        if (pcmc->pci_enabled) {
>> +            memory_region_set_readonly(option_rom_mr, true);
>> +        }
>>       }
>>       memory_region_add_subregion_overlap(rom_memory,
>>                                           PC_ROM_MIN_VGA,
>> diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
>> index 00464afcb4..def77a442d 100644
>> --- a/hw/i386/pc_sysfw.c
>> +++ b/hw/i386/pc_sysfw.c
>> @@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion 
>> *isa_bios, MemoryRegion *rom_memory,
>>       /* map the last 128KB of the BIOS in ISA space */
>>       isa_bios_size = MIN(flash_size, 128 * KiB);
>> -    memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
>> -                           &error_fatal);
>> +    if (sev_snp_enabled()) {
>> +        memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
>> +                                           isa_bios_size, &error_fatal);
>> +    } else {
>> +        memory_region_init_ram(isa_bios, NULL, "isa-bios", 
>> isa_bios_size,
>> +                               &error_fatal);
>> +    }
>>       memory_region_add_subregion_overlap(rom_memory,
>>                                           0x100000 - isa_bios_size,
>>                                           isa_bios,
>> @@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, 
>> MemoryRegion *rom_memory,
>>              ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
>>              isa_bios_size);
>> -    memory_region_set_readonly(isa_bios, true);
>> +    if (!machine_require_guest_memfd(current_machine)) {
>> +        memory_region_set_readonly(isa_bios, true);
>> +    }
> 
> This patch takes different approach than next patch that this patch 
> chooses to not set readonly when require guest memfd while next patch 
> skips the whole isa_bios setup for -bios case. Why make different 
> handling for the two cases?
> 
> More importantly, with commit a44ea3fa7f2a,
> pcmc->isa_bios_alias is default true for all new machine after 9.0, then 
> pc_isa_bios_init() would be hit even on plash case. It will call 
> x86_isa_bios_init() in pc_system_flash_map().
> 
> So with -bios case, the call site is
> 
>    pc_system_firmware_init()
>       -> x86_bios_rom_init()
>      -> x86_isa_bios_init()
> 
>    because require_guest_memfd is true for snp, x86_isa_bios_init() is 
> not called.
> 
> However, with pflash case, the call site is

btw, right now we don't support 'pflash' case with SNP. Please see the 
discussion [1]. So, this seems to be pre-enablement for 'bios' with 
'pflash' case, which we proposed in PATCH 29 and could not make it to
Upstream (reasons mentioned in the thread). But this does not impact the 
other functionality.

Thanks,
Pankaj

[1] 
https://lore.kernel.org/all/20240530111643.1091816-30-pankaj.gupta@amd.com/

> 
>    pc_system_firmware_init()
>       -> pc_system_flash_map()
>       -> if (pcmc->isa_bios_alias) {
>                  x86_isa_bios_init();
>              } else {
>                  pc_isa_bios_init();
>              }
> 
> As I said above, pcmc->isa_bios_alias is true for machine after 9.0, so 
> it will goes x86_isa_bios_init().
> 
> So please anyone explain to me why x86_isa_bios_init() is ok for pflash 
> case but not for -bios case?
> 
>>   }
>>   static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
> 


Re: [PATCH v4 27/31] hw/i386/sev: Use guest_memfd for legacy ROMs
Posted by Paolo Bonzini 5 months, 3 weeks ago
On Thu, May 30, 2024 at 1:17 PM Pankaj Gupta <pankaj.gupta@amd.com> wrote:
>
> From: Michael Roth <michael.roth@amd.com>
>
> Current SNP guest kernels will attempt to access these regions with
> with C-bit set, so guest_memfd is needed to handle that. Otherwise,
> kvm_convert_memory() will fail when the guest kernel tries to access it
> and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
> to private.
>
> Whether guests should actually try to access ROM regions in this way (or
> need to deal with legacy ROM regions at all), is a separate issue to be
> addressed on kernel side, but current SNP guest kernels will exhibit
> this behavior and so this handling is needed to allow QEMU to continue
> running existing SNP guest kernels.

[...]

>  #ifdef CONFIG_XEN_EMU
> @@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms,
>      pc_system_firmware_init(pcms, rom_memory);
>
>      option_rom_mr = g_malloc(sizeof(*option_rom_mr));
> -    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
> -                           &error_fatal);
> -    if (pcmc->pci_enabled) {
> -        memory_region_set_readonly(option_rom_mr, true);
> +    if (sev_snp_enabled()) {

Using sev_snp_enabled() here however is pretty ugly...

Fortunately we can fix machine_require_guest_memfd(), which I think is
initialized later (?), so that it is usable here too (and the code is
cleaner). To do so, just delegate machine_require_guest_memfd() to the
ConfidentialGuestSupport object (see patch at
https://patchew.org/QEMU/20240531112636.80097-1-pbonzini@redhat.com/)
and then initialize the new field in SevSnpGuest's instance_init
function:

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 1c5e2e7a1f9..a7574d1c707 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -2328,8 +2328,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
 static void
 sev_snp_guest_instance_init(Object *obj)
 {
+    ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
     SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);

+    cgs->require_guest_memfd = true;
+
     /* default init/start/finish params for kvm */
     sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
 }


Paolo