RFC: https://bugzilla.tianocore.org/show_bug.cgi?id=3429
In TDX BSP and APs goes to the same entry point in SecEntry.nasm.
BSP initialize the temporary stack and then jumps to SecMain, just as
legacy Ovmf does.
APs spin in a modified mailbox loop using initial mailbox structure.
Its structure defition is in OvmfPkg/Include/IndustryStandard/IntelTdx.h.
APs wait for command to see if the command is for me. If so execute the
command.
There are 2 commands are supported:
- WakeUp:
BSP issues this command to move APs to final OS spinloop and Mailbox
in reserved memory.
- AcceptPages:
To mitigate the performance impact of accepting pages in SEC phase on
BSP, BSP will parse memory resources and assign each AP the task of
accepting a subset of pages. This command may be called several times
until all memory resources are processed. In accepting pages, PageLevel
may fall back to smaller one if SIZE_MISMATCH error is returned.
TdxCommondefs.inc is added which includes the common definitions used by
the APs in SecEntry.nasm.
Cc: Ard Biesheuvel <ardb+tianocore@kernel.org>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Erdem Aktas <erdemaktas@google.com>
Cc: James Bottomley <jejb@linux.ibm.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Min Xu <min.m.xu@intel.com>
---
OvmfPkg/Include/TdxCommondefs.inc | 51 +++++
OvmfPkg/Sec/SecMain.inf | 1 +
OvmfPkg/Sec/X64/SecEntry.nasm | 314 ++++++++++++++++++++++++++++++
3 files changed, 366 insertions(+)
create mode 100644 OvmfPkg/Include/TdxCommondefs.inc
diff --git a/OvmfPkg/Include/TdxCommondefs.inc b/OvmfPkg/Include/TdxCommondefs.inc
new file mode 100644
index 000000000000..970eac96592a
--- /dev/null
+++ b/OvmfPkg/Include/TdxCommondefs.inc
@@ -0,0 +1,51 @@
+;------------------------------------------------------------------------------
+; @file
+; TDX Common defitions used by the APs in mailbox
+;
+; Copyright (c) 2021, Intel Corporation. All rights reserved.<BR>
+; SPDX-License-Identifier: BSD-2-Clause-Patent
+;
+;------------------------------------------------------------------------------
+
+CommandOffset equ 00h
+ApicidOffset equ 04h
+WakeupVectorOffset equ 08h
+OSArgsOffset equ 10h
+FirmwareArgsOffset equ 800h
+WakeupArgsRelocatedMailBox equ 800h
+AcceptPageArgsPhysicalStart equ 800h
+AcceptPageArgsPhysicalEnd equ 808h
+AcceptPageArgsChunkSize equ 810h
+AcceptPageArgsPageSize equ 818h
+CpuArrivalOffset equ 900h
+CpusExitingOffset equ 0a00h
+TalliesOffset equ 0a08h
+ErrorsOffset equ 0e08h
+
+SIZE_4KB equ 1000h
+SIZE_2MB equ 200000h
+SIZE_1GB equ 40000000h
+
+PAGE_ACCEPT_LEVEL_4K equ 0
+PAGE_ACCEPT_LEVEL_2M equ 1
+PAGE_ACCEPT_LEVEL_1G equ 2
+
+TDX_PAGE_ALREADY_ACCEPTED equ 0x00000b0a
+TDX_PAGE_SIZE_MISMATCH equ 0xc0000b0b
+
+; Errors of APs in Mailbox
+ERROR_NON equ 0
+ERROR_INVALID_ACCEPT_PAGE_SIZE equ 1
+ERROR_ACCEPT_PAGE_ERROR equ 2
+ERROR_INVALID_FALLBACK_PAGE_LEVEL equ 3
+
+MpProtectedModeWakeupCommandNoop equ 0
+MpProtectedModeWakeupCommandWakeup equ 1
+MpProtectedModeWakeupCommandSleep equ 2
+MpProtectedModeWakeupCommandAcceptPages equ 3
+
+MailboxApicIdInvalid equ 0xffffffff
+MailboxApicidBroadcast equ 0xfffffffe
+
+%define TDCALL_TDINFO 0x1
+%define TDCALL_TDACCEPTPAGE 0x6
diff --git a/OvmfPkg/Sec/SecMain.inf b/OvmfPkg/Sec/SecMain.inf
index ea4b9611f52d..6083fa21a433 100644
--- a/OvmfPkg/Sec/SecMain.inf
+++ b/OvmfPkg/Sec/SecMain.inf
@@ -72,6 +72,7 @@
gEfiMdeModulePkgTokenSpaceGuid.PcdInitValueInTempStack
gUefiOvmfPkgTokenSpaceGuid.PcdOvmfConfidentialComputingWorkAreaHeader
gUefiOvmfPkgTokenSpaceGuid.PcdOvmfWorkAreaBase
+ gUefiOvmfPkgTokenSpaceGuid.PcdOvmfSecGhcbBackupBase
[FeaturePcd]
gUefiOvmfPkgTokenSpaceGuid.PcdSmmSmramRequire
diff --git a/OvmfPkg/Sec/X64/SecEntry.nasm b/OvmfPkg/Sec/X64/SecEntry.nasm
index 1cc680a70716..d0833db68410 100644
--- a/OvmfPkg/Sec/X64/SecEntry.nasm
+++ b/OvmfPkg/Sec/X64/SecEntry.nasm
@@ -10,12 +10,17 @@
;------------------------------------------------------------------------------
#include <Base.h>
+%include "TdxCommondefs.inc"
DEFAULT REL
SECTION .text
extern ASM_PFX(SecCoreStartupWithStack)
+%macro tdcall 0
+ db 0x66, 0x0f, 0x01, 0xcc
+%endmacro
+
;
; SecCore Entry Point
;
@@ -35,6 +40,32 @@ extern ASM_PFX(SecCoreStartupWithStack)
global ASM_PFX(_ModuleEntryPoint)
ASM_PFX(_ModuleEntryPoint):
+ ;
+ ; Guest type is stored in OVMF_WORK_AREA
+ ;
+ %define OVMF_WORK_AREA FixedPcdGet32 (PcdOvmfWorkAreaBase)
+ %define VM_GUEST_TYPE_TDX 2
+ mov eax, OVMF_WORK_AREA
+ cmp byte[eax], VM_GUEST_TYPE_TDX
+ jne InitStack
+
+ mov rax, TDCALL_TDINFO
+ tdcall
+
+ ;
+ ; R8 [31:0] NUM_VCPUS
+ ; [63:32] MAX_VCPUS
+ ; R9 [31:0] VCPU_INDEX
+ ; Td Guest set the VCPU0 as the BSP, others are the APs
+ ; APs jump to spinloop and get released by DXE's MpInitLib
+ ;
+ mov rax, r9
+ and rax, 0xffff
+ test rax, rax
+ jne ParkAp
+
+InitStack:
+
;
; Fill the temporary RAM with the initial stack value.
; The loop below will seed the heap as well, but that's harmless.
@@ -67,3 +98,286 @@ ASM_PFX(_ModuleEntryPoint):
sub rsp, 0x20
call ASM_PFX(SecCoreStartupWithStack)
+ ;
+ ; Note: BSP never gets here. APs will be unblocked by DXE
+ ;
+ ; R8 [31:0] NUM_VCPUS
+ ; [63:32] MAX_VCPUS
+ ; R9 [31:0] VCPU_INDEX
+ ;
+ParkAp:
+
+ mov rbp, r9
+
+.do_wait_loop:
+ mov rsp, FixedPcdGet32 (PcdOvmfSecGhcbBackupBase)
+
+ ;
+ ; register itself in [rsp + CpuArrivalOffset]
+ ;
+ mov rax, 1
+ lock xadd dword [rsp + CpuArrivalOffset], eax
+ inc eax
+
+.check_arrival_cnt:
+ cmp eax, r8d
+ je .check_command
+ mov eax, dword[rsp + CpuArrivalOffset]
+ jmp .check_arrival_cnt
+
+.check_command:
+ mov eax, dword[rsp + CommandOffset]
+ cmp eax, MpProtectedModeWakeupCommandNoop
+ je .check_command
+
+ cmp eax, MpProtectedModeWakeupCommandWakeup
+ je .do_wakeup
+
+ cmp eax, MpProtectedModeWakeupCommandAcceptPages
+ jne .check_command
+
+ ;
+ ; AP Accept Pages
+ ;
+ ; Accept Pages in TDX is time-consuming, especially for big memory.
+ ; One of the mitigation is to accept pages by BSP and APs parallely.
+ ;
+ ; For example, there are 4 CPUs (1 BSP and 3 APs). Totally there are
+ ; 1G memory to be accepted.
+ ;
+ ; BSP is responsible for the memory regions of:
+ ; Start : StartAddress + ChunkSize * (4) * Index
+ ; Length: ChunkSize
+ ; APs is reponsible for the memory regions of:
+ ; Start : StartAddress + ChunkSize * (4) * Index + ChunkSize * CpuId
+ ; Length: ChunkSize
+ ;
+ ; TDCALL_TDACCEPTPAGE supports the PageSize of 4K and 2M. Sometimes when
+ ; the PageSize is 2M, TDX_PAGE_SIZE_MISMATCH is returned as the error code.
+ ; In this case, TDVF need fall back to 4k PageSize to accept again.
+ ;
+ ; If any errors happened in accept pages, an error code is recorded in
+ ; Mailbox [ErrorsOffset + CpuIndex]
+ ;
+.ap_accept_page:
+
+ ;
+ ; Clear the errors and fallback flag
+ ;
+ mov al, ERROR_NON
+ mov byte[rsp + ErrorsOffset + rbp], al
+ xor r12, r12
+
+ ;
+ ; Get PhysicalAddress/ChunkSize/PageSize
+ ;
+ mov rcx, [rsp + AcceptPageArgsPhysicalStart]
+ mov rbx, [rsp + AcceptPageArgsChunkSize]
+
+ ;
+ ; Set AcceptPageLevel based on the AcceptPagesize
+ ; Currently only 2M/4K page size is acceptable
+ ;
+ mov r15, [rsp + AcceptPageArgsPageSize]
+ cmp r15, SIZE_4KB
+ je .set_4kb
+ cmp r15, SIZE_2MB
+ je .set_2mb
+
+ mov al, ERROR_INVALID_ACCEPT_PAGE_SIZE
+ mov byte[rsp + ErrorsOffset + rbp], al
+ jmp .do_finish_command
+
+.set_4kb:
+ mov r15, PAGE_ACCEPT_LEVEL_4K
+ jmp .physical_address
+
+.set_2mb:
+ mov r15, PAGE_ACCEPT_LEVEL_2M
+
+.physical_address:
+ ;
+ ; PhysicalAddress += (CpuId * ChunkSize)
+ ;
+ xor rdx, rdx
+ mov eax, ebp
+ mul ebx
+ add rcx, rax
+ shl rdx, 32
+ add rcx, rdx
+
+.do_accept_next_range:
+ ;
+ ; Make sure we don't accept page beyond ending page
+ ; This could happen is ChunkSize crosses the end of region
+ ;
+ cmp rcx, [rsp + AcceptPageArgsPhysicalEnd ]
+ jge .do_finish_command
+
+ ;
+ ; Save starting address for this region
+ ;
+ mov r11, rcx
+
+ ;
+ ; Size = MIN(ChunkSize, PhysicalEnd - PhysicalAddress);
+ ;
+ mov rax, [rsp + AcceptPageArgsPhysicalEnd]
+ sub rax, rcx
+ cmp rax, rbx
+ jge .do_accept_loop
+ mov rbx, rax
+
+.do_accept_loop:
+ ;
+ ; RCX: Accept address
+ ; R15: Accept Page Level
+ ; R12: Flag of fall back accept
+ ;
+ mov rax, TDCALL_TDACCEPTPAGE
+ xor rdx, rdx
+ or rcx, r15
+
+ tdcall
+
+ ;
+ ; Check status code in RAX
+ ;
+ test rax, rax
+ jz .accept_success
+
+ shr rax, 32
+ cmp eax, TDX_PAGE_ALREADY_ACCEPTED
+ jz .already_accepted
+
+ cmp eax, TDX_PAGE_SIZE_MISMATCH
+ jz .accept_size_mismatch
+
+ ;
+ ; other error
+ ;
+ mov al, ERROR_ACCEPT_PAGE_ERROR
+ mov byte[rsp + ErrorsOffset + rbp], al
+ jmp .do_finish_command
+
+.accept_size_mismatch:
+ ;
+ ; Check the current PageLevel.
+ ; ACCEPT_LEVEL_4K is the least level and cannot fall back any more.
+ ; If in this case, just record the error and return
+ ;
+ cmp r15, PAGE_ACCEPT_LEVEL_4K
+ jne .do_fallback_accept
+ mov al, ERROR_INVALID_FALLBACK_PAGE_LEVEL
+ mov byte[rsp + ErrorsOffset + rbp], al
+ jmp .do_finish_command
+
+.do_fallback_accept:
+ ;
+ ; In fall back accept, just loop 512 times (2M = 512 * 4K)
+ ; Save the rcx in r13.
+ ; Decrease the PageLevel in R15.
+ ; R12 indicates it is in a fall back accept loop.
+ ;
+ mov r14, 512
+ and rcx, ~0x3ULL
+ mov r13, rcx
+ xor rdx, rdx
+ dec r15
+ mov r12, 1
+
+ jmp .do_accept_loop
+
+.accept_success:
+ ;
+ ; Keep track of how many accepts per cpu
+ ;
+ inc dword[rsp + TalliesOffset + rbp * 4]
+
+ ;
+ ; R12 indicate whether it is a fall back accept
+ ; If it is a success of fall back accept
+ ; Just loop 512 times to .do_accept_loop
+ ;
+ test r12, r12
+ jz .normal_accept_success
+
+ ;
+ ; This is fallback accept success
+ ;
+ add rcx, SIZE_4KB
+ dec r14
+ test r14, r14
+ jz .fallback_accept_done
+ jmp .do_accept_loop
+
+.fallback_accept_done:
+ ;
+ ; Fall back accept done.
+ ; Restore the start address to RCX from R13
+ ; Clear the fall back accept flag
+ ;
+ mov rcx, r13
+ inc r15
+ xor r12, r12
+
+.already_accepted:
+ ;
+ ; Handle the sitution of fall back accpet
+ ;
+ test r12, r12
+ jnz .accept_success
+
+.normal_accept_success:
+ ;
+ ; Reduce accept size by a PageSize, and increment address
+ ;
+ mov r12, [rsp + AcceptPageArgsPageSize]
+ sub rbx, r12
+ add rcx, r12
+ xor r12, r12
+
+ ;
+ ; We may be given multiple pages to accept, make sure we
+ ; aren't done
+ ;
+ test rbx, rbx
+ jne .do_accept_loop
+
+ ;
+ ; Restore address before, and then increment by stride (num-cpus * ChunkSize)
+ ;
+ xor rdx, rdx
+ mov rcx, r11
+ mov eax, r8d
+ mov ebx, [rsp + AcceptPageArgsChunkSize]
+ mul ebx
+ add rcx, rax
+ shl rdx, 32
+ add rcx, rdx
+ jmp .do_accept_next_range
+
+.do_finish_command:
+ mov eax, 0FFFFFFFFh
+ lock xadd dword [rsp + CpusExitingOffset], eax
+ dec eax
+
+.check_exiting_cnt:
+ cmp eax, 0
+ je .do_wait_loop
+ mov eax, dword[rsp + CpusExitingOffset]
+ jmp .check_exiting_cnt
+
+.do_wakeup:
+ ;
+ ; BSP sets these variables before unblocking APs
+ ; RAX: WakeupVectorOffset
+ ; RBX: Relocated mailbox address
+ ; RBP: vCpuId
+ ;
+ mov rax, 0
+ mov eax, dword[rsp + WakeupVectorOffset]
+ mov rbx, [rsp + WakeupArgsRelocatedMailBox]
+ nop
+ jmp rax
+ jmp $
--
2.29.2.windows.2
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#82970): https://edk2.groups.io/g/devel/message/82970
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Hi, > - AcceptPages: > To mitigate the performance impact of accepting pages in SEC phase on > BSP, BSP will parse memory resources and assign each AP the task of > accepting a subset of pages. This command may be called several times > until all memory resources are processed. In accepting pages, PageLevel > may fall back to smaller one if SIZE_MISMATCH error is returned. Why add an assembler version of this? There already is a C version (in TdxLib, patch #2). When adding lazy accept at some point in the future we will stop accepting all pages in the SEC phase anyway. There is Mp support (patch #14) so you can distribute the load to all CPUs in PEI / DXE phase if you want (although the benefits of parallel accept will be much smaller once lazy accept is there). take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83221): https://edk2.groups.io/g/devel/message/83221 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On November 3, 2021 2:31 PM, Gerd Hoffmann wrote: > Hi, > > > - AcceptPages: > > To mitigate the performance impact of accepting pages in SEC phase on > > BSP, BSP will parse memory resources and assign each AP the task of > > accepting a subset of pages. This command may be called several times > > until all memory resources are processed. In accepting pages, PageLevel > > may fall back to smaller one if SIZE_MISMATCH error is returned. > > Why add an assembler version of this? There already is a C version (in TdxLib, > patch #2). When adding lazy accept at some point in the future we will stop > accepting all pages in the SEC phase anyway. There is Mp support (patch #14) > so you can distribute the load to all CPUs in PEI / DXE phase if you want > (although the benefits of parallel accept will be much smaller once lazy > accept is there). There are below considerations about accept pages in SEC phase. 1. There is a minimal memory requirement in DxeCore [1]. In legacy Ovmf the memory is initialized in PEI phase. But TDVF has 2 configurations (Config-A and Config-B) [2]. PEI phase is skipped in Config-B. So we have to accept memories in SEC phase. This is to make the code consistent in Config-A and Config-B. 2. TDVF's MP support is via Mailbox [3] . BSP wakes up APs by putting command/parameters in Mailbox. So we have this patch [4]. While EDK2's MP support (CpuMpPei/CpuDxe) is via INIT-SIPI-SIPI. They're different. We cannot distribute the load to all CPUs with EDK2's MP service. Patch #14 [5] is the commit to enable TDX in MpInitLib. Actually this is to make sure the CpuMpPei/CpuDxe will not break in Td guest in run-time. Patch #14 is rather simple, for example, ApWorker is not supported. 3. In current TDVF implementation, Stack is not set for APs. So C functions cannot be called for APs. If stack is set for APs, then more memories should be reserved in MEMFD. For example, if 1 AP needs 4k size stack, then 15 AP need 60k memory. (We assume only 1+15 CPUs accept memory). This makes things complicated. Based on above considerations, we use the current design that BSP-APs accept memory in SEC phase (in assembly code). [1] https://github.com/tianocore/edk2/blob/master/MdeModulePkg/Core/Dxe/Gcd/Gcd.c#L2245 [2] https://edk2.groups.io/g/devel/message/76367 [3] https://www.intel.com/content/dam/develop/external/us/en/documents/intel-tdx-guest-hypervisor-communication-interface-1.0-344426-002.pdf Section 4.1 [4] https://github.com/mxu9/edk2/commit/4501df794c2c4dbfc5ba63c93a1e2c96660c072e [5] https://github.com/mxu9/edk2/commit/189ad46fb732460eaa3e3f0787f098466b1504a6 Thanks Min -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83784): https://edk2.groups.io/g/devel/message/83784 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Tue, Nov 16, 2021 at 12:11:33PM +0000, Xu, Min M wrote: > On November 3, 2021 2:31 PM, Gerd Hoffmann wrote: > > Hi, > > > > > - AcceptPages: > > > To mitigate the performance impact of accepting pages in SEC phase on > > > BSP, BSP will parse memory resources and assign each AP the task of > > > accepting a subset of pages. This command may be called several times > > > until all memory resources are processed. In accepting pages, PageLevel > > > may fall back to smaller one if SIZE_MISMATCH error is returned. > > > > Why add an assembler version of this? There already is a C version (in TdxLib, > > patch #2). When adding lazy accept at some point in the future we will stop > > accepting all pages in the SEC phase anyway. There is Mp support (patch #14) > > so you can distribute the load to all CPUs in PEI / DXE phase if you want > > (although the benefits of parallel accept will be much smaller once lazy > > accept is there). > There are below considerations about accept pages in SEC phase. > > 1. There is a minimal memory requirement in DxeCore [1]. In legacy > Ovmf the memory is initialized in PEI phase. Yes. > But TDVF has 2 configurations (Config-A and Config-B) [2]. PEI phase > is skipped in Config-B. So we have to accept memories in SEC phase. I'm sure I've asked this before: Why skip the PEI phase? So far I have not seen any convincing argument for it. Jiewen argued this is a simplification. Which is not completely wrong, but it's also only half the truth. Switching all OVMF builds over to PEI-less boot doesn't work because some features supported by OVMF depend on PEI Modules. Therefore TDX Config-B skipping the PEI phase means we would have to maintain two boot work flows (with and without PEI phase) for OVMF. Which in turn would imply more work for maintenance, testing and so on. Also note that accepting all memory in SEC phase would be temporary only. Once we have support for lazy accept in OVMF we will accept most memory in DXE phase (or leave it to the linux kernel), whereas SEC/PEI will accept only a small fraction of the memory. Just enough to allow DXE phase initialize memory management & lazy accept support. > This is to make the code consistent in Config-A and Config-B. I want TDVF be consistent with the rest of OVMF. Makes long-term maintenance easier. Building a single binary for both SEV and TDX with full confidential computing support (including config-b features) will be easier too. > 2. TDVF's MP support is via Mailbox [3] . BSP wakes up APs by putting > command/parameters in Mailbox. So we have this patch [4]. While > EDK2's MP support (CpuMpPei/CpuDxe) is via INIT-SIPI-SIPI. They're > different. We cannot distribute the load to all CPUs with EDK2's MP > service. > Patch #14 [5] is the commit to enable TDX in MpInitLib. Actually this > is to make sure the CpuMpPei/CpuDxe will not break in Td guest in > run-time. Patch #14 is rather simple, for example, ApWorker is not > supported. Well, MpInitLib seems to have full support (including ApWorker) for SEV. I'd expect you can add TDX support too, and schedule the jobs you want run on the APs via TDX mailbox instead of using IPIs. And I think to support parallel lazy accept in the DXE phase (for lazy accept) you will need proper MpInitLib support anyway. > 3. In current TDVF implementation, Stack is not set for APs. So C > functions cannot be called for APs. If stack is set for APs, then more > memories should be reserved in MEMFD. For example, if 1 AP needs 4k > size stack, then 15 AP need 60k memory. (We assume only 1+15 CPUs > accept memory). This makes things complicated. I think skipping PEI phase and moving stuff to SEC phase makes things complicated. Reserving stacks in MEMFD would only be needed because you can't allocate memory in the SEC phase. When you initialize the APs in PEI instead you can just allocate memory and the MEMFD dependency goes away. > Based on above considerations, we use the current design that BSP-APs > accept memory in SEC phase (in assembly code). I think the design doesn't make much sense for the reasons outlined above. I'd suggest to put aside parallel accept for now and just let the BSP accept the memory for initial TDX support. Focus on adding lazy accept support next. Finally re-evaluate parallel accept support, *after* merging lazy accept. With a major change in the memory acceptance workflow being planned it doesn't look like a good idea to me to tune memory accept performance *now*. My naive expectation would be that parallel accept in SEC/PEI simply isn't worth the effort due to the small amount of memory being needed. Parallel accept in DXE probably is useful, but how much it speeds up boot depends on how much accepted memory we must hand over to the linux kernel so it has enough room to successfully initialize memory management & lazy accept. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83810): https://edk2.groups.io/g/devel/message/83810 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Comment on config-B. > -----Original Message----- > From: Gerd Hoffmann <kraxel@redhat.com> > Sent: Wednesday, November 17, 2021 11:20 PM > To: Xu, Min M <min.m.xu@intel.com> > Cc: devel@edk2.groups.io; Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, > Jordan L <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; > Erdem Aktas <erdemaktas@google.com>; James Bottomley > <jejb@linux.ibm.com>; Yao, Jiewen <jiewen.yao@intel.com>; Tom Lendacky > <thomas.lendacky@amd.com> > Subject: Re: [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to support Tdx > > On Tue, Nov 16, 2021 at 12:11:33PM +0000, Xu, Min M wrote: > > On November 3, 2021 2:31 PM, Gerd Hoffmann wrote: > > > Hi, > > > > > > > - AcceptPages: > > > > To mitigate the performance impact of accepting pages in SEC phase on > > > > BSP, BSP will parse memory resources and assign each AP the task of > > > > accepting a subset of pages. This command may be called several times > > > > until all memory resources are processed. In accepting pages, PageLevel > > > > may fall back to smaller one if SIZE_MISMATCH error is returned. > > > > > > Why add an assembler version of this? There already is a C version (in TdxLib, > > > patch #2). When adding lazy accept at some point in the future we will stop > > > accepting all pages in the SEC phase anyway. There is Mp support (patch #14) > > > so you can distribute the load to all CPUs in PEI / DXE phase if you want > > > (although the benefits of parallel accept will be much smaller once lazy > > > accept is there). > > There are below considerations about accept pages in SEC phase. > > > > 1. There is a minimal memory requirement in DxeCore [1]. In legacy > > Ovmf the memory is initialized in PEI phase. > > Yes. > > > But TDVF has 2 configurations (Config-A and Config-B) [2]. PEI phase > > is skipped in Config-B. So we have to accept memories in SEC phase. > > I'm sure I've asked this before: Why skip the PEI phase? So far > I have not seen any convincing argument for it. [Jiewen] First, keeping or skipping PEI phase is a platform decision, not an architecture decision. Skipping PEI phase is valid architecture design. In EDKII, most platforms choose to include PEI. And we also have multiple platforms skipping PEI phase. For example: https://github.com/tianocore/edk2-platforms/blob/master/Platform/ARM/JunoPkg/ArmJuno.fdf https://github.com/tianocore/edk2-platforms/blob/master/Platform/BeagleBoard/BeagleBoardPkg/BeagleBoardPkg.fdf https://github.com/tianocore/edk2-platforms/blob/master/Platform/Hisilicon/HiKey/HiKey.fdf https://github.com/tianocore/edk2-platforms/blob/master/Platform/Hisilicon/HiKey960/HiKey960.fdf https://github.com/tianocore/edk2-platforms/blob/master/Platform/RaspberryPi/RPi3/RPi3.fdf https://github.com/tianocore/edk2-platforms/blob/master/Platform/RaspberryPi/RPi4/RPi4.fdf Second, the confidential computing changes the threat model completely. One of our goal is to simplify the design for CC-firmware (TDVF) - remove unnecessary modules, remove unnecessary interface, make the image smaller and faster. That will reduce the validation effort, too. That is the main motivation. Third, I have explained this clearly in the original email and review meeting. Because it is a big change, the original maintainer (Laszlo) recommend we use two configuration. Config-A is to keep current architecture, to maximum compatible with OVMF. And we don't remove VMM out of TCB. Config-B is to have a new TDVF design, to maximum satisfy the security requirement. And we remove VMM out of TCB. We agreed, and we are proceeding. > Jiewen argued this is a simplification. Which is not completely wrong, > but it's also only half the truth. Switching all OVMF builds over to > PEI-less boot doesn't work because some features supported by OVMF > depend on PEI Modules. Therefore TDX Config-B skipping the PEI phase > means we would have to maintain two boot work flows (with and without > PEI phase) for OVMF. Which in turn would imply more work for > maintenance, testing and so on. [Jiewen] I am not asking your to OVMF build to PEI-less. But if you want to do, I will not object. As EDKII maintainer, I don't see a burden to maintain multiple boot work flow. We are already doing that on other project. As least for me, I don't see any problem. If you think it is a burden, you may work partial of the feature. EDKII is a big project. Nobody knows everything, including me. I will also rely on other people to handle some features I am not familiar with. I don't see any problem. On contrast, if we keep PEI for config B, it adds extra burden from security assurance perspective. That means, every issue in PEI may be exposed to TDVF. Comparing the effort to maintain the work flow and the effort to handle potential security issue, I would choose to maintain the work flow. I have experience to wait for 1 year embargo to fix EDKII security issue, it is very painful and brings huge burden. > Also note that accepting all memory in SEC phase would be temporary > only. Once we have support for lazy accept in OVMF we will accept most > memory in DXE phase (or leave it to the linux kernel), whereas SEC/PEI > will accept only a small fraction of the memory. Just enough to allow > DXE phase initialize memory management & lazy accept support. > > > This is to make the code consistent in Config-A and Config-B. > > I want TDVF be consistent with the rest of OVMF. Makes long-term > maintenance easier. Building a single binary for both SEV and TDX with > full confidential computing support (including config-b features) will > be easier too. [Jiewen] I am not convinced that TDVF be consist with rest of OVMF. The goal of project is different. The choice can be different. I don't see a reason that one platform must be in this way, just because other platform does in this way. Easy and difficult are very subjective term. I think a PEI-less TDVF is much easier to maintain, comparing with complicated OVMF flow, at least from security perspective. The less code we have, the less issue we have. > > 2. TDVF's MP support is via Mailbox [3] . BSP wakes up APs by putting > > command/parameters in Mailbox. So we have this patch [4]. While > > EDK2's MP support (CpuMpPei/CpuDxe) is via INIT-SIPI-SIPI. They're > > different. We cannot distribute the load to all CPUs with EDK2's MP > > service. > > > Patch #14 [5] is the commit to enable TDX in MpInitLib. Actually this > > is to make sure the CpuMpPei/CpuDxe will not break in Td guest in > > run-time. Patch #14 is rather simple, for example, ApWorker is not > > supported. > > Well, MpInitLib seems to have full support (including ApWorker) for SEV. > I'd expect you can add TDX support too, and schedule the jobs you want > run on the APs via TDX mailbox instead of using IPIs. > > And I think to support parallel lazy accept in the DXE phase (for lazy > accept) you will need proper MpInitLib support anyway. > > > 3. In current TDVF implementation, Stack is not set for APs. So C > > functions cannot be called for APs. If stack is set for APs, then more > > memories should be reserved in MEMFD. For example, if 1 AP needs 4k > > size stack, then 15 AP need 60k memory. (We assume only 1+15 CPUs > > accept memory). This makes things complicated. > > I think skipping PEI phase and moving stuff to SEC phase makes things > complicated. Reserving stacks in MEMFD would only be needed because you > can't allocate memory in the SEC phase. When you initialize the APs in > PEI instead you can just allocate memory and the MEMFD dependency goes > away. > > > Based on above considerations, we use the current design that BSP-APs > > accept memory in SEC phase (in assembly code). > > I think the design doesn't make much sense for the reasons outlined > above. > > I'd suggest to put aside parallel accept for now and just let the BSP > accept the memory for initial TDX support. Focus on adding lazy accept > support next. Finally re-evaluate parallel accept support, *after* > merging lazy accept. > > With a major change in the memory acceptance workflow being planned it > doesn't look like a good idea to me to tune memory accept performance > *now*. > > My naive expectation would be that parallel accept in SEC/PEI simply > isn't worth the effort due to the small amount of memory being needed. > Parallel accept in DXE probably is useful, but how much it speeds up > boot depends on how much accepted memory we must hand over to the linux > kernel so it has enough room to successfully initialize memory > management & lazy accept. > > take care, > Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83849): https://edk2.groups.io/g/devel/message/83849 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi, > Comment on config-B. > > I'm sure I've asked this before: Why skip the PEI phase? So far > > I have not seen any convincing argument for it. > > Skipping PEI phase is valid architecture design. Sure. > Second, the confidential computing changes the threat model > completely. One of our goal is to simplify the design for CC-firmware > (TDVF) - remove unnecessary modules, remove unnecessary interface, > make the image smaller and faster. That will reduce the validation > effort, too. > > That is the main motivation. That totally makes sense. I expect TDVF Config-B will look alot like the existing AmdSev configuration variant which is stripped down too. No SMM support, no network stack, ... There wouldn't be left much in PEI beside PeiCore and OvmfPkg/PlatformPei. But I don't see how dropping the PEI phase altogether helps much in stripping down the firmware image. The initialization currently handled by OvmfPkg/PlatformPei must happen somewhere else instead. Given SEC is a very restricted environment I don't expect the code can be shared easily, so we will probably end up with code duplication. Also two different boot workflows which I fear can easily introduce subtle bugs due to differences like a initialization order changes. This is what I see as maintenance problem. > Config-A is to keep current architecture, to maximum compatible with > OVMF. And we don't remove VMM out of TCB. > Config-B is to have a new TDVF design, to maximum satisfy the security > requirement. And we remove VMM out of TCB. Sure. config-a is ovmf with tdx support added, all uefi features present, only basic tdx/sev support (basically support memory encryption). config-b is simliar to AmdSev (maybe we'll merge them some day), stripped down uefi feature set (no network etc), full tdx support including attestation etc. I don't want question all that. I still don't see the point in dropping the PEI phase and make config-b work different that all other ovmf variants though. > > Jiewen argued this is a simplification. Which is not completely wrong, > > but it's also only half the truth. Switching all OVMF builds over to > > PEI-less boot doesn't work because some features supported by OVMF > > depend on PEI Modules. Therefore TDX Config-B skipping the PEI phase > > means we would have to maintain two boot work flows (with and without > > PEI phase) for OVMF. Which in turn would imply more work for > > maintenance, testing and so on. > > [Jiewen] I am not asking your to OVMF build to PEI-less. > But if you want to do, I will not object. s3, smm, tpm and maybe more depends on pei modules, so dropping the PEi phase is not an option for a full-featured OVMF build. So I'd very much prefer all ovmf variants (including tdvf) use the PEI phase. > On contrast, if we keep PEI for config B, it adds extra burden from > security assurance perspective. That means, every issue in PEI may be > exposed to TDVF. Same for all other modules used by tdvf. The list of affected PEI modules is rather short though. I think it's only PeiCore and DxeIpl. PlatformPei doesn't count as the code wouldn't go away but would be moved to SEC (and maybe parts of it to DXE). > Comparing the effort to maintain the work flow and the effort to > handle potential security issue, I would choose to maintain the work > flow. I have experience to wait for 1 year embargo to fix EDKII > security issue, it is very painful and brings huge burden. The security workflow is a serious problem indeed. Not only for TDVF, also for OVMF in general, and other platforms too. We should certainly try to improve it. I'm not going to open that discussion in this thread. But let me drop two links two links to osfc talk and workshop (Not 30th + Dec 1st), titled "The firmware supply-chain security is broken: can we fix it?" https://talks.osfc.io/osfc2021/talk/D9X39Z/ https://talks.osfc.io/osfc2021/talk/E9YYJF/ > > I want TDVF be consistent with the rest of OVMF. Makes long-term > > maintenance easier. Building a single binary for both SEV and TDX with > > full confidential computing support (including config-b features) will > > be easier too. > > [Jiewen] I am not convinced that TDVF be consist with rest of OVMF. > The goal of project is different. The choice can be different. > I don't see a reason that one platform must be in this way, just because other platform does in this way. Hmm? Seeing TDVF as "other platform" is a rather strange view given that we are integrating tdx support into OVMF right now ... > I think a PEI-less TDVF is much easier to maintain, comparing with > complicated OVMF flow, at least from security perspective. The less > code we have, the less issue we have. Well, we will have TDX support in the normal OVMF workflow anyway, because we need that for config-a. Why use and maintain something different for config-b? That looks rather pointless to me ... take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83901): https://edk2.groups.io/g/devel/message/83901 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> -----Original Message-----
> From: Gerd Hoffmann <kraxel@redhat.com>
> Sent: Friday, November 19, 2021 11:12 PM
> To: Yao, Jiewen <jiewen.yao@intel.com>
> Cc: Xu, Min M <min.m.xu@intel.com>; devel@edk2.groups.io; Ard Biesheuvel
> <ardb+tianocore@kernel.org>; Justen, Jordan L <jordan.l.justen@intel.com>;
> Brijesh Singh <brijesh.singh@amd.com>; Erdem Aktas
> <erdemaktas@google.com>; James Bottomley <jejb@linux.ibm.com>; Tom
> Lendacky <thomas.lendacky@amd.com>
> Subject: Re: [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to support Tdx
>
> Hi,
>
> > Comment on config-B.
>
> > > I'm sure I've asked this before: Why skip the PEI phase? So far
> > > I have not seen any convincing argument for it.
> >
> > Skipping PEI phase is valid architecture design.
>
> Sure.
>
> > Second, the confidential computing changes the threat model
> > completely. One of our goal is to simplify the design for CC-firmware
> > (TDVF) - remove unnecessary modules, remove unnecessary interface,
> > make the image smaller and faster. That will reduce the validation
> > effort, too.
> >
> > That is the main motivation.
>
> That totally makes sense. I expect TDVF Config-B will look alot like
> the existing AmdSev configuration variant which is stripped down too.
[Jiewen] I don't think TDVF config-B will be like the AMD SEV is right statement.
TDVF and SEV are two different platforms.
Intel mainly focuses on TDVF and we will let AMD defines the feature set in SEV.
They MAY be alike if possible.
But difference is also acceptable if there is architecture difference or different decision in different company.
> No SMM support, no network stack, ...
>
> There wouldn't be left much in PEI beside PeiCore and
> OvmfPkg/PlatformPei.
>
> But I don't see how dropping the PEI phase altogether helps much in
> stripping down the firmware image. The initialization currently handled
> by OvmfPkg/PlatformPei must happen somewhere else instead. Given SEC is
> a very restricted environment I don't expect the code can be shared
> easily, so we will probably end up with code duplication. Also two
> different boot workflows which I fear can easily introduce subtle bugs
> due to differences like a initialization order changes. This is what I
> see as maintenance problem.
[Jiewen] I don't think this is right statement.
In Tiano history, there were security bugs exposed in PEI phase, even the PEI Core on FV processing.
I do see the value to skip PEI core.
Again, I am very familiar with non-PEI flow.
Back to 10 years ago, I was maintainer of a non-PEI platform (named DUET) and we jumped from SEC to DXE directly.
I don't see any maintenance problem.
>
> > Config-A is to keep current architecture, to maximum compatible with
> > OVMF. And we don't remove VMM out of TCB.
>
> > Config-B is to have a new TDVF design, to maximum satisfy the security
> > requirement. And we remove VMM out of TCB.
>
> Sure.
>
> config-a is ovmf with tdx support added, all uefi features present, only
> basic tdx/sev support (basically support memory encryption).
>
> config-b is simliar to AmdSev (maybe we'll merge them some day),
> stripped down uefi feature set (no network etc), full tdx support
> including attestation etc.
[Jiewen] I think we are debating two different things.
Your statement that "config-B is similar to AmdSev" does not support the statement "config-B should be adopt what AmdSev chooses".
TDVF config-B is proposed by Intel. AMD SEV is proposed by AMD. I respect SEV people and I *may* propose something, but I will ack their decision.
I would not force them to accept the TDVF config-B. And vice versa.
>
> I don't want question all that. I still don't see the point in dropping
> the PEI phase and make config-b work different that all other ovmf
> variants though.
[Jiewen] My point is simple - Threat Model is different.
That causes security objective difference and design difference.
Each CC env owner should have freedom to choose what they want to enforce.
IMHO, they are the policy decision maker.
> > > Jiewen argued this is a simplification. Which is not completely wrong,
> > > but it's also only half the truth. Switching all OVMF builds over to
> > > PEI-less boot doesn't work because some features supported by OVMF
> > > depend on PEI Modules. Therefore TDX Config-B skipping the PEI phase
> > > means we would have to maintain two boot work flows (with and without
> > > PEI phase) for OVMF. Which in turn would imply more work for
> > > maintenance, testing and so on.
> >
> > [Jiewen] I am not asking your to OVMF build to PEI-less.
> > But if you want to do, I will not object.
>
> s3, smm, tpm and maybe more depends on pei modules, so dropping the PEi
> phase is not an option for a full-featured OVMF build. So I'd very much
> prefer all ovmf variants (including tdvf) use the PEI phase.
>
> > On contrast, if we keep PEI for config B, it adds extra burden from
> > security assurance perspective. That means, every issue in PEI may be
> > exposed to TDVF.
>
> Same for all other modules used by tdvf.
>
> The list of affected PEI modules is rather short though. I think it's
> only PeiCore and DxeIpl. PlatformPei doesn't count as the code wouldn't
> go away but would be moved to SEC (and maybe parts of it to DXE).
>
> > Comparing the effort to maintain the work flow and the effort to
> > handle potential security issue, I would choose to maintain the work
> > flow. I have experience to wait for 1 year embargo to fix EDKII
> > security issue, it is very painful and brings huge burden.
>
> The security workflow is a serious problem indeed. Not only for TDVF,
> also for OVMF in general, and other platforms too. We should certainly
> try to improve it.
[Jiewen] If you look at how we state config-A and config-B again, you will find we made difference statement.
I copy it here again.
1) Config-A is to keep current architecture, to maximum compatible with OVMF. And we don't remove VMM out of TCB.
2) Config-B is to have a new TDVF design, to maximum satisfy the security requirement. And we remove VMM out of TCB.
Because of the threat model difference, in config-A, we can safely make some assumption that the VMM is benign and VMM will not input malicious data. As such, we might not perform data validation. We just trust VMM input.
However, in config-B, VMM is malicious, which means we need be careful to NOT trust VMM at any time.
The code in config-A and config-B may do totally different thing to handle the difference situation.
I don't think it is hidden assumption that if TDVF need do some check, then a generic OVMF need do this check.
If OVMF trusts the VMM, the check might be totally unnecessary.
> I'm not going to open that discussion in this thread. But let me drop
> two links two links to osfc talk and workshop (Not 30th + Dec 1st),
> titled "The firmware supply-chain security is broken: can we fix it?"
>
> https://talks.osfc.io/osfc2021/talk/D9X39Z/
> https://talks.osfc.io/osfc2021/talk/E9YYJF/
>
> > > I want TDVF be consistent with the rest of OVMF. Makes long-term
> > > maintenance easier. Building a single binary for both SEV and TDX with
> > > full confidential computing support (including config-b features) will
> > > be easier too.
> >
> > [Jiewen] I am not convinced that TDVF be consist with rest of OVMF.
> > The goal of project is different. The choice can be different.
> > I don't see a reason that one platform must be in this way, just because other
> platform does in this way.
>
> Hmm? Seeing TDVF as "other platform" is a rather strange view given
> that we are integrating tdx support into OVMF right now ...
[Jiewen] This is how Intel views the "platform".
In history, we call this one binary mode is "multiple-platform" or "multiple-SKU".
That means we only have one binary, and this single binary can boot different platforms, which has similar CPU or silicon but different platform board design.
And there will be platform specific code flow, such as
Switch (PlatformId) {
Case PlatformA:
{do platformA init}
Case PlatformB:
{do platformB init}
}
If you treat CC_TYPE to be platformID, it perfectly matches.
Also a platform may has extra module (a driver or an FV) to support the platform specific feature. Or a platform may much simpler design and skip some drivers.
The actual multi-platform design is even more complicated, because we also have group concept. So I will stop the discussion here.
>
> > I think a PEI-less TDVF is much easier to maintain, comparing with
> > complicated OVMF flow, at least from security perspective. The less
> > code we have, the less issue we have.
>
> Well, we will have TDX support in the normal OVMF workflow anyway,
> because we need that for config-a. Why use and maintain something
> different for config-b? That looks rather pointless to me ...
[Jiewen] I think I have explained a lot above.
The key difference between config-a and config-b is threat mode.
I even propose config-a skip PEI phase. I am persuaded to let config-a adopt the OVMF way, because the threat model of config-A is same as the normal OVMF.
But config-B is NOT.
Different threat model drives different solution.
I completely don't understand why they must be same.
If you force me to add PEI to config-B. Finally, that causes TDVF config-B is compromised due to an issue in PEI.
Who will take the responsibility? Will you or RedHat take that?
>
> take care,
> Gerd
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#83903): https://edk2.groups.io/g/devel/message/83903
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Hi,
> > That totally makes sense. I expect TDVF Config-B will look alot like
> > the existing AmdSev configuration variant which is stripped down too.
>
> [Jiewen] I don't think TDVF config-B will be like the AMD SEV is right statement.
> TDVF and SEV are two different platforms.
Yes, of course. But even though there are differences in both
implementation and supported features both platforms have similar goals
and there is quite some overlap in concepts too.
> Intel mainly focuses on TDVF and we will let AMD defines the feature
> set in SEV. They MAY be alike if possible. But difference is also
> acceptable if there is architecture difference or different decision
> in different company.
Yes. Whenever they are close enough that merging them makes sense
remains to be seen.
> > But I don't see how dropping the PEI phase altogether helps much in
> > stripping down the firmware image. The initialization currently handled
> > by OvmfPkg/PlatformPei must happen somewhere else instead. Given SEC is
> > a very restricted environment I don't expect the code can be shared
> > easily, so we will probably end up with code duplication. Also two
> > different boot workflows which I fear can easily introduce subtle bugs
> > due to differences like a initialization order changes. This is what I
> > see as maintenance problem.
>
> [Jiewen] I don't think this is right statement.
> In Tiano history, there were security bugs exposed in PEI phase, even the PEI Core on FV processing.
> I do see the value to skip PEI core.
On the other hand there are probably more eyes are looking at PEI Core
because it is used by a lot of platforms, increasing the chance that
bugs are actually spotted.
> Again, I am very familiar with non-PEI flow. Back to 10 years ago, I
> was maintainer of a non-PEI platform (named DUET) and we jumped from
> SEC to DXE directly. I don't see any maintenance problem.
The maintenance problem isn't a non-PEI flow. If a platform chooses
that -- fine. The problem having to maintain both PEI and non-PEI
workflow.
> [Jiewen] I think we are debating two different things.
> Your statement that "config-B is similar to AmdSev" does not support
> the statement "config-B should be adopt what AmdSev chooses".
I never stated "config-B should be adopt what AmdSev chooses".
I stated "TDVF boot workflow should be simlar to the other OVMF
platforms to simplify maintenance".
AmdSev is just an example showing that you can easily strip down the
firmware build without putting the boot workflow upside down (which one
of the things config-b wants too).
> > I don't want question all that. I still don't see the point in dropping
> > the PEI phase and make config-b work different that all other ovmf
> > variants though.
>
> [Jiewen] My point is simple - Threat Model is different.
> That causes security objective difference and design difference.
Does not using PEI phase have any advantages from a security point of
view (other than "not using PEI Core code which might have bugs")?
> > The security workflow is a serious problem indeed. Not only for TDVF,
> > also for OVMF in general, and other platforms too. We should certainly
> > try to improve it.
>
> [Jiewen] If you look at how we state config-A and config-B again, you will find we made difference statement.
> I copy it here again.
> 1) Config-A is to keep current architecture, to maximum compatible with OVMF. And we don't remove VMM out of TCB.
> 2) Config-B is to have a new TDVF design, to maximum satisfy the security requirement. And we remove VMM out of TCB.
>
> Because of the threat model difference, in config-A, we can safely
> make some assumption that the VMM is benign and VMM will not input
> malicious data. As such, we might not perform data validation. We just
> trust VMM input.
>
> However, in config-B, VMM is malicious, which means we need be careful to NOT trust VMM at any time.
> The code in config-A and config-B may do totally different thing to handle the difference situation.
>
> I don't think it is hidden assumption that if TDVF need do some check, then a generic OVMF need do this check.
> If OVMF trusts the VMM, the check might be totally unnecessary.
Do you have a concrete example for that?
I can't think of a good reason to skip checks on OVMF. When we -- for
example -- review and improve virtio drivers to make sure they can't be
used by the VMM to exploit a TDVF guest: We surely would use the
improved sanity checks on OVMF too, for better OVMF stability and also
for better test coverage of the sanity checks.
> > Hmm? Seeing TDVF as "other platform" is a rather strange view given
> > that we are integrating tdx support into OVMF right now ...
>
> [Jiewen] This is how Intel views the "platform".
> In history, we call this one binary mode is "multiple-platform" or "multiple-SKU".
> That means we only have one binary, and this single binary can boot different platforms, which has similar CPU or silicon but different platform board design.
> And there will be platform specific code flow, such as
> Switch (PlatformId) {
> Case PlatformA:
> {do platformA init}
> Case PlatformB:
> {do platformB init}
> }
>
> If you treat CC_TYPE to be platformID, it perfectly matches.
Yes. We have that in quite a few places. IoLib for example.
It's required for Config-A, obviously.
So, what is your plan for IoLib for Config-B?
> Also a platform may has extra module (a driver or an FV) to support
> the platform specific feature. Or a platform may much simpler design
> and skip some drivers.
Sure. Config-A will need some drivers from OvmfPkg/AmdSev/ so both SEV
and TDX work, whereas Config-B will not.
> I even propose config-a skip PEI phase.
Care to back your proposal by posting patches to the list?
I think it's easier to discuss the advantages + disadvantages
with concrete code at hand.
> I am persuaded to let config-a adopt the OVMF way, because the threat model of config-A is same as the normal OVMF.
> But config-B is NOT.
> Different threat model drives different solution.
> I completely don't understand why they must be same.
I don't understand why you want separate them. Improving OvmfPkg
security is good for both OVMF and TDVF. For TDVF it is clearly much
more important due to the different threat model, but it is good for
OVMF too. Likewise edk2 in general.
> If you force me to add PEI to config-B. Finally, that causes TDVF config-B is compromised due to an issue in PEI.
> Who will take the responsibility? Will you or RedHat take that?
Bugs happen. There could also be bugs in the additional SEC code you
need for platform setup in a non-PEI configuration ...
take care,
Gerd
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#83943): https://edk2.groups.io/g/devel/message/83943
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Comment below only:
> I am persuaded to let config-a adopt the OVMF way, because the threat model of config-A is same as the normal OVMF.
> But config-B is NOT.
> Different threat model drives different solution.
> I completely don't understand why they must be same.
I don't understand why you want separate them. Improving OvmfPkg
security is good for both OVMF and TDVF. For TDVF it is clearly much
more important due to the different threat model, but it is good for
OVMF too. Likewise edk2 in general.
[Jiewen] My answer is very clear as I mentioned multiple times.
The threat model is different.
IMHO, talking about "Improving OvmfPkg security" without a clear threat model is *meaningless*.
All mitigation must be based upon threat mode and security objective.
Otherwise, what you are doing might be either unnecessary or insufficient.
> If you force me to add PEI to config-B. Finally, that causes TDVF config-B is compromised due to an issue in PEI.
> Who will take the responsibility? Will you or RedHat take that?
Bugs happen. There could also be bugs in the additional SEC code you
need for platform setup in a non-PEI configuration ...
[Jiewen] I agree. That is why we need smaller code.
We can just focus on review that small portion of code what we have written for TDVF, instead of the whole PEI.
We have process to review and maintain the extra TDVF code.
I think it is totally *unfair* that you force me to add PEI, without knowing the quality of PEI.
Thank you
Yao Jiewen
> -----Original Message-----
> From: Gerd Hoffmann <kraxel@redhat.com>
> Sent: Tuesday, November 23, 2021 8:38 PM
> To: Yao, Jiewen <jiewen.yao@intel.com>
> Cc: Xu, Min M <min.m.xu@intel.com>; devel@edk2.groups.io; Ard Biesheuvel
> <ardb+tianocore@kernel.org>; Justen, Jordan L <jordan.l.justen@intel.com>;
> Brijesh Singh <brijesh.singh@amd.com>; Erdem Aktas
> <erdemaktas@google.com>; James Bottomley <jejb@linux.ibm.com>; Tom
> Lendacky <thomas.lendacky@amd.com>
> Subject: Re: [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to support Tdx
>
> Hi,
>
> > > That totally makes sense. I expect TDVF Config-B will look alot like
> > > the existing AmdSev configuration variant which is stripped down too.
> >
> > [Jiewen] I don't think TDVF config-B will be like the AMD SEV is right statement.
> > TDVF and SEV are two different platforms.
>
> Yes, of course. But even though there are differences in both
> implementation and supported features both platforms have similar goals
> and there is quite some overlap in concepts too.
>
> > Intel mainly focuses on TDVF and we will let AMD defines the feature
> > set in SEV. They MAY be alike if possible. But difference is also
> > acceptable if there is architecture difference or different decision
> > in different company.
>
> Yes. Whenever they are close enough that merging them makes sense
> remains to be seen.
>
> > > But I don't see how dropping the PEI phase altogether helps much in
> > > stripping down the firmware image. The initialization currently handled
> > > by OvmfPkg/PlatformPei must happen somewhere else instead. Given SEC is
> > > a very restricted environment I don't expect the code can be shared
> > > easily, so we will probably end up with code duplication. Also two
> > > different boot workflows which I fear can easily introduce subtle bugs
> > > due to differences like a initialization order changes. This is what I
> > > see as maintenance problem.
> >
> > [Jiewen] I don't think this is right statement.
> > In Tiano history, there were security bugs exposed in PEI phase, even the PEI
> Core on FV processing.
> > I do see the value to skip PEI core.
>
> On the other hand there are probably more eyes are looking at PEI Core
> because it is used by a lot of platforms, increasing the chance that
> bugs are actually spotted.
>
> > Again, I am very familiar with non-PEI flow. Back to 10 years ago, I
> > was maintainer of a non-PEI platform (named DUET) and we jumped from
> > SEC to DXE directly. I don't see any maintenance problem.
>
> The maintenance problem isn't a non-PEI flow. If a platform chooses
> that -- fine. The problem having to maintain both PEI and non-PEI
> workflow.
>
> > [Jiewen] I think we are debating two different things.
> > Your statement that "config-B is similar to AmdSev" does not support
> > the statement "config-B should be adopt what AmdSev chooses".
>
> I never stated "config-B should be adopt what AmdSev chooses".
>
> I stated "TDVF boot workflow should be simlar to the other OVMF
> platforms to simplify maintenance".
>
> AmdSev is just an example showing that you can easily strip down the
> firmware build without putting the boot workflow upside down (which one
> of the things config-b wants too).
>
> > > I don't want question all that. I still don't see the point in dropping
> > > the PEI phase and make config-b work different that all other ovmf
> > > variants though.
> >
> > [Jiewen] My point is simple - Threat Model is different.
> > That causes security objective difference and design difference.
>
> Does not using PEI phase have any advantages from a security point of
> view (other than "not using PEI Core code which might have bugs")?
>
> > > The security workflow is a serious problem indeed. Not only for TDVF,
> > > also for OVMF in general, and other platforms too. We should certainly
> > > try to improve it.
> >
> > [Jiewen] If you look at how we state config-A and config-B again, you will find
> we made difference statement.
> > I copy it here again.
> > 1) Config-A is to keep current architecture, to maximum compatible with
> OVMF. And we don't remove VMM out of TCB.
> > 2) Config-B is to have a new TDVF design, to maximum satisfy the security
> requirement. And we remove VMM out of TCB.
> >
> > Because of the threat model difference, in config-A, we can safely
> > make some assumption that the VMM is benign and VMM will not input
> > malicious data. As such, we might not perform data validation. We just
> > trust VMM input.
> >
> > However, in config-B, VMM is malicious, which means we need be careful to
> NOT trust VMM at any time.
> > The code in config-A and config-B may do totally different thing to handle the
> difference situation.
> >
> > I don't think it is hidden assumption that if TDVF need do some check, then a
> generic OVMF need do this check.
> > If OVMF trusts the VMM, the check might be totally unnecessary.
>
> Do you have a concrete example for that?
>
> I can't think of a good reason to skip checks on OVMF. When we -- for
> example -- review and improve virtio drivers to make sure they can't be
> used by the VMM to exploit a TDVF guest: We surely would use the
> improved sanity checks on OVMF too, for better OVMF stability and also
> for better test coverage of the sanity checks.
>
> > > Hmm? Seeing TDVF as "other platform" is a rather strange view given
> > > that we are integrating tdx support into OVMF right now ...
> >
> > [Jiewen] This is how Intel views the "platform".
> > In history, we call this one binary mode is "multiple-platform" or "multiple-
> SKU".
> > That means we only have one binary, and this single binary can boot different
> platforms, which has similar CPU or silicon but different platform board design.
> > And there will be platform specific code flow, such as
> > Switch (PlatformId) {
> > Case PlatformA:
> > {do platformA init}
> > Case PlatformB:
> > {do platformB init}
> > }
> >
> > If you treat CC_TYPE to be platformID, it perfectly matches.
>
> Yes. We have that in quite a few places. IoLib for example.
> It's required for Config-A, obviously.
>
> So, what is your plan for IoLib for Config-B?
>
> > Also a platform may has extra module (a driver or an FV) to support
> > the platform specific feature. Or a platform may much simpler design
> > and skip some drivers.
>
> Sure. Config-A will need some drivers from OvmfPkg/AmdSev/ so both SEV
> and TDX work, whereas Config-B will not.
>
> > I even propose config-a skip PEI phase.
>
> Care to back your proposal by posting patches to the list?
>
> I think it's easier to discuss the advantages + disadvantages
> with concrete code at hand.
>
> > I am persuaded to let config-a adopt the OVMF way, because the threat model
> of config-A is same as the normal OVMF.
> > But config-B is NOT.
> > Different threat model drives different solution.
> > I completely don't understand why they must be same.
>
> I don't understand why you want separate them. Improving OvmfPkg
> security is good for both OVMF and TDVF. For TDVF it is clearly much
> more important due to the different threat model, but it is good for
> OVMF too. Likewise edk2 in general.
>
> > If you force me to add PEI to config-B. Finally, that causes TDVF config-B is
> compromised due to an issue in PEI.
> > Who will take the responsibility? Will you or RedHat take that?
>
> Bugs happen. There could also be bugs in the additional SEC code you
> need for platform setup in a non-PEI configuration ...
>
> take care,
> Gerd
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#83944): https://edk2.groups.io/g/devel/message/83944
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
On Tue, 2021-11-23 at 13:07 +0000, Yao, Jiewen wrote: > Comment below only: > > > I am persuaded to let config-a adopt the OVMF way, because the > > threat model of config-A is same as the normal OVMF. > > But config-B is NOT. > > Different threat model drives different solution. > > I completely don't understand why they must be same. > > I don't understand why you want separate them. Improving OvmfPkg > security is good for both OVMF and TDVF. For TDVF it is clearly much > more important due to the different threat model, but it is good for > OVMF too. Likewise edk2 in general. > > [Jiewen] My answer is very clear as I mentioned multiple times. > The threat model is different. > IMHO, talking about "Improving OvmfPkg security" without a clear > threat model is *meaningless*. > All mitigation must be based upon threat mode and security objective. > Otherwise, what you are doing might be either unnecessary or > insufficient. Can we take a step back and look at the bigger picture. The way EFI is supposed to operate, according to the architecture model: https://uefi.org/sites/default/files/resources/PI_Spec_1_7_final_Jan_2019.pdf (Figure 1 and Table 4) is that SEC is supposed to be as small and compact as possible, so it could be ROM installed without worrying about upgrade issues. It's job is only to get the CPU initialized to the point it can run PEI, measure PEI and transfer control. Security depends on the smallness of SEC because if it's rom installed (as a root of trust ought to be) it can't be upgraded to fix a bug. PEI is supposed to initialize the hardware: set up the CPU, memory Application Processors and board configuration so we're in a known state with described features for DXE. It then measures DXE (only if SEC didn't measure it) and hands off to DXE. PEI code is designed not to interact with anything except setup to minimize external exploitation of internal bugs. DXE is supposed to contain only runtime code, including device drivers. The security point here is that the job of PEI is over as soon as it hands off to DXE, so at that point all the PEI code could be discarded (it usually isn't, but it could be). This strict isolation between DXE and PEI means that once we're in DXE, any bugs in PEI can't be exploited to attack the DXE environment. This allows us to minimize DXE which is where most of the main risk of configuration based exploitation is. In this security model, you increase security by moving as much code as you can from DXE to PEI because the true attack surface is DXE. To a lot of us, cutting out PEI, which requires redistributing code into DXE sounds like an increase not a reduction in the attack surface of the code. You can argue that OVMF doesn't obey the model above and has a lot of initialization code in DXE anyway, but then the correct route would be to fix our PEI/DXE separation to recover the security properties. > > If you force me to add PEI to config-B. Finally, that causes TDVF > > config-B is compromised due to an issue in PEI. > > Who will take the responsibility? Will you or RedHat take that? > > Bugs happen. There could also be bugs in the additional SEC code you > need for platform setup in a non-PEI configuration ... > > [Jiewen] I agree. That is why we need smaller code. > We can just focus on review that small portion of code what we have > written for TDVF, instead of the whole PEI. > We have process to review and maintain the extra TDVF code. This depends ... if you agree with the security model outlined above, bugs in PEI are less of a problem because they can't be exploited. Or do you not agree with this security model? Security isn't about total bug elimination, it's about exploit elimination. You fix a security vulnerability either by fixing the bug that can be exploited or removing the ability to exploit it. This is the reason for technologies like NX ... they don't stop people exploiting bugs to write code to the stack, but they do mean that once you have the code on the stack you can no-longer execute it and the attackers have to move on to other means of gaining control. The great thing about exploit elimination vs bug elimination is the former can usually be done on a whole class of bugs and the latter requires a whack-a-mole per bug type approach. James -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83947): https://edk2.groups.io/g/devel/message/83947 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> This strict isolation between DXE and PEI means that once we're in DXE, > any bugs in PEI can't be exploited to attack the DXE environment. [jiewen] I would disagree the statement above. There is not strict isolation. Actually no isolation at all. The DXE is loaded by PEI. A bug in PEI has global impact and it can definitely be used to attack the DXE. thank you! Yao, Jiewen > 在 2021年11月23日,下午10:27,James Bottomley <jejb@linux.ibm.com> 写道: > > On Tue, 2021-11-23 at 13:07 +0000, Yao, Jiewen wrote: >> Comment below only: >> >>> I am persuaded to let config-a adopt the OVMF way, because the >>> threat model of config-A is same as the normal OVMF. >>> But config-B is NOT. >>> Different threat model drives different solution. >>> I completely don't understand why they must be same. >> >> I don't understand why you want separate them. Improving OvmfPkg >> security is good for both OVMF and TDVF. For TDVF it is clearly much >> more important due to the different threat model, but it is good for >> OVMF too. Likewise edk2 in general. >> >> [Jiewen] My answer is very clear as I mentioned multiple times. >> The threat model is different. >> IMHO, talking about "Improving OvmfPkg security" without a clear >> threat model is *meaningless*. >> All mitigation must be based upon threat mode and security objective. >> Otherwise, what you are doing might be either unnecessary or >> insufficient. > > Can we take a step back and look at the bigger picture. The way EFI is > supposed to operate, according to the architecture model: > > https://uefi.org/sites/default/files/resources/PI_Spec_1_7_final_Jan_2019.pdf > (Figure 1 and Table 4) > > is that SEC is supposed to be as small and compact as possible, so it > could be ROM installed without worrying about upgrade issues. It's job > is only to get the CPU initialized to the point it can run PEI, measure > PEI and transfer control. Security depends on the smallness of SEC > because if it's rom installed (as a root of trust ought to be) it can't > be upgraded to fix a bug. > > PEI is supposed to initialize the hardware: set up the CPU, memory > Application Processors and board configuration so we're in a known > state with described features for DXE. It then measures DXE (only if > SEC didn't measure it) and hands off to DXE. PEI code is designed not > to interact with anything except setup to minimize external > exploitation of internal bugs. > > DXE is supposed to contain only runtime code, including device drivers. > > The security point here is that the job of PEI is over as soon as it > hands off to DXE, so at that point all the PEI code could be discarded > (it usually isn't, but it could be). > > This strict isolation between DXE and PEI means that once we're in DXE, > any bugs in PEI can't be exploited to attack the DXE environment. This > allows us to minimize DXE which is where most of the main risk of > configuration based exploitation is. > > In this security model, you increase security by moving as much code as > you can from DXE to PEI because the true attack surface is DXE. > > To a lot of us, cutting out PEI, which requires redistributing code > into DXE sounds like an increase not a reduction in the attack surface > of the code. You can argue that OVMF doesn't obey the model above and > has a lot of initialization code in DXE anyway, but then the correct > route would be to fix our PEI/DXE separation to recover the security > properties. > >>> If you force me to add PEI to config-B. Finally, that causes TDVF >>> config-B is compromised due to an issue in PEI. >>> Who will take the responsibility? Will you or RedHat take that? >> >> Bugs happen. There could also be bugs in the additional SEC code you >> need for platform setup in a non-PEI configuration ... >> >> [Jiewen] I agree. That is why we need smaller code. >> We can just focus on review that small portion of code what we have >> written for TDVF, instead of the whole PEI. >> We have process to review and maintain the extra TDVF code. > > This depends ... if you agree with the security model outlined above, > bugs in PEI are less of a problem because they can't be exploited. Or > do you not agree with this security model? > > Security isn't about total bug elimination, it's about exploit > elimination. You fix a security vulnerability either by fixing the bug > that can be exploited or removing the ability to exploit it. This is > the reason for technologies like NX ... they don't stop people > exploiting bugs to write code to the stack, but they do mean that once > you have the code on the stack you can no-longer execute it and the > attackers have to move on to other means of gaining control. > > The great thing about exploit elimination vs bug elimination is the > former can usually be done on a whole class of bugs and the latter > requires a whack-a-mole per bug type approach. > > James > > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83948): https://edk2.groups.io/g/devel/message/83948 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Tue, 2021-11-23 at 14:36 +0000, Yao, Jiewen wrote: > > This strict isolation between DXE and PEI means that once we're in > > DXE, any bugs in PEI can't be exploited to attack the DXE > > environment. > > [jiewen] I would disagree the statement above. > There is not strict isolation. Actually no isolation at all. > The DXE is loaded by PEI. Not in OVMF ... DXE and PEI are actually loaded by SEC. PEI eventually jumps to execute DXE but that's after all its own tasks are completed. > A bug in PEI has global impact and it can definitely be used to > attack the DXE. Only if it can be exploited. Moving things to PEI is mitigating the exploitability not the bugs. The point about exploitability and PEI is that it doesn't read any config files, it can't execute any EFI binaries and it has no Human Interface modules so can't be influenced even by a physically present attacker. No ability to influence is what removes the ability to exploit. James -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83949): https://edk2.groups.io/g/devel/message/83949 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
I would say the PEI owns the system and all memory (including the DXE). A bug in PEI may override the loaded DXE memory or the whole system. In history I did see PEI security issues. Some security issue in PEI caused system compromised completely. You even have no chance to run DXE. thank you! Yao, Jiewen > 在 2021年11月23日,下午10:52,James Bottomley <jejb@linux.ibm.com> 写道: > > On Tue, 2021-11-23 at 14:36 +0000, Yao, Jiewen wrote: >>> This strict isolation between DXE and PEI means that once we're in >>> DXE, any bugs in PEI can't be exploited to attack the DXE >>> environment. >> >> [jiewen] I would disagree the statement above. >> There is not strict isolation. Actually no isolation at all. >> The DXE is loaded by PEI. > > Not in OVMF ... DXE and PEI are actually loaded by SEC. PEI eventually > jumps to execute DXE but that's after all its own tasks are completed. > >> A bug in PEI has global impact and it can definitely be used to >> attack the DXE. > > Only if it can be exploited. Moving things to PEI is mitigating the > exploitability not the bugs. The point about exploitability and PEI is > that it doesn't read any config files, it can't execute any EFI > binaries and it has no Human Interface modules so can't be influenced > even by a physically present attacker. No ability to influence is what > removes the ability to exploit. > > James > > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83950): https://edk2.groups.io/g/devel/message/83950 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Tue, 2021-11-23 at 15:10 +0000, Yao, Jiewen wrote: > I would say the PEI owns the system and all memory (including the > DXE). > > A bug in PEI may override the loaded DXE memory or the whole system. That's not the correct way to analyse the security properties. From the security point of view this is a trapdoor system: once you go through the door, you can't go back (the trapdoor being the jump from PEI to DXE). The trapdoor isolates the domains and allows you to analyse the security properties of each separately. It also allows separation of exposure ... which is what we use in this case: the PEI domain has very limited exposure, it's the DXE domain that has full exposure but, because of the trapdoor, bugs in PEI code can't be used to exploit the system when it has transitioned to the DXE domain. > In history I did see PEI security issues. > Some security issue in PEI caused system compromised completely. You > even have no chance to run DXE. The security domain analysis above doesn't mean no bug in PEI is ever exploitable but it does mean that there are fewer exploitability classes in PEI than DXE because the security domain is much less exposed. James -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#83951): https://edk2.groups.io/g/devel/message/83951 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
I think we are discussing under different context. First, the term "isolation" shall be clarified. In my context, "isolation" means two domain cannot impact each other. The isolation is enforced by a 3rd higher privileged component. Examples: Ring3 apps are isolated by OS. TDs are isolated by TDX Module. That is why I say: there is no isolation. In your context, if one domain jumps to another domain and never jump back, then you call it "isolation". Second, in EDKII, we have similar concept - we call trust region (TR): 1) Recovery Trust Region (PEI) 2) Main Trust Region (DXE-before EndOfDxe) 3) MM Trust Region (SMM) 4) Boot Trust Region (DXE w/o CSM-after EndOfDxe) 5) Legacy Trust Region (DXE with CSM-after EndOfDxe) 6) OS Trust Region (OS Boot) We use term "transition" to describe the domain jump. We don’t use term "isolation". We use "isolation" where two co-existed RT cannot tamper each other. For example, MM trust region and Boot Trust Region are isolated. Actually, the only isolation example we have in BIOS is x86 SMM or ARM TrustZone. We have below security assumption: 1) What can be trusted? The later launched TR can trust the earlier TR. Here "trust" means "use data input without validation" For example: 1.1) Main TR can trust the input from Recovery TR. 1.2) MM RT can trust the input from Main TR. 2) What cannot be trusted? Here "not trust" means "validate data input before use " For example: 2.1) MM RT cannot trust the input from Boot TR. 2.2) Recovery RT cannot trust the input from Boot TR. However, TR just means a region definition to help us do security analysis. It is NOT related to any security exposure, severity, or exploitability. There is no conclusion that a bug in PEI is more or less exploitable than DXE or SMM. Here, I have comment for the sentence below: 1. " the PEI domain has very limited exposure, it's the DXE domain that has full exposure " [Jiewen] I don’t understand how that is concluded, on " limited exposure ", " full exposure ". 2. "bugs in PEI code can't be used to exploit the system when it has transitioned to the DXE domain." [Jiewen] I disagree. A bug in PEI code may already modify the HOB, while the HOB is an architecture data input for DXE. If DXE relies on some malicious data from PEI, DXE might be exploited later. 3. " but it does mean that there are fewer exploitability classes in PEI than DXE because the security domain is much less exposed." [Jiewen] I don’t understand how that is concluded, on "fewer", "less". In history, there are security bugs in PEI and there are security bugs in DXE. I won't say fewer or less. Also because we use *LOCK* mechanism, and some LOCKs are enforced in PEI phase. A bug in PEI might be more severe than a bug in DXE. Hi James Sorry, I am a little lost now. To be honest, I am not sure what is objection on the discussion. Are you question the general threat model analysis on UEFI PI architecture? Or are you trying to persuade me we should include PEI in TDVF, because you think it is safer to add code in PEI ? Or something else? Please enlighten me that. Thank you Yao, Jiewen > -----Original Message----- > From: James Bottomley <jejb@linux.ibm.com> > Sent: Tuesday, November 23, 2021 11:38 PM > To: devel@edk2.groups.io; Yao, Jiewen <jiewen.yao@intel.com> > Cc: Gerd Hoffmann <kraxel@redhat.com>; Xu, Min M <min.m.xu@intel.com>; > Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, Jordan L > <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; Erdem > Aktas <erdemaktas@google.com>; Tom Lendacky <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > On Tue, 2021-11-23 at 15:10 +0000, Yao, Jiewen wrote: > > I would say the PEI owns the system and all memory (including the > > DXE). > > > > A bug in PEI may override the loaded DXE memory or the whole system. > > That's not the correct way to analyse the security properties. From > the security point of view this is a trapdoor system: once you go > through the door, you can't go back (the trapdoor being the jump from > PEI to DXE). The trapdoor isolates the domains and allows you to > analyse the security properties of each separately. It also allows > separation of exposure ... which is what we use in this case: the PEI > domain has very limited exposure, it's the DXE domain that has full > exposure but, because of the trapdoor, bugs in PEI code can't be used > to exploit the system when it has transitioned to the DXE domain. > > > In history I did see PEI security issues. > > Some security issue in PEI caused system compromised completely. You > > even have no chance to run DXE. > > The security domain analysis above doesn't mean no bug in PEI is ever > exploitable but it does mean that there are fewer exploitability > classes in PEI than DXE because the security domain is much less > exposed. > > James > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84010): https://edk2.groups.io/g/devel/message/84010 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi, > 1. " the PEI domain has very limited exposure, it's the DXE domain that has full exposure " > [Jiewen] I don’t understand how that is concluded, on " limited exposure ", " full exposure ". exposure == "the need to process external input, which an attacker might use to exploit bugs in edk2 by crafting input data accordingly." There isn't much external input to process in PEI phase. Virtual machines are a bit different than physical machines. They need to process some input from the host here which describes the virtual hardware so they can initialize it properly. For example parse the etc/e820 fw_cfg file to figure how much memory is installed (or parse the td hob in case tdx is used). That platform-specific code for virtual machine initialization must do careful sanity checking when you don't want trust the VMM of course. Whenever that code lives in SEC or PEI doesn't change the picture much though. > 2. "bugs in PEI code can't be used to exploit the system when it has transitioned to the DXE domain." > [Jiewen] I disagree. A bug in PEI code may already modify the HOB, while the HOB is an architecture data input for DXE. > If DXE relies on some malicious data from PEI, DXE might be exploited later. Attacking PEI is harder though because the external input it processes is limited when compared to DXE. Once you are transitioned to DXE you can't call into PEI Modules any more. So, how would an attacker trick PEI code into modifying HOBs (or carrying out other actions under attackers control)? take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84021): https://edk2.groups.io/g/devel/message/84021 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> -----Original Message----- > From: Gerd Hoffmann <kraxel@redhat.com> > Sent: Wednesday, November 24, 2021 4:12 PM > To: Yao, Jiewen <jiewen.yao@intel.com> > Cc: jejb@linux.ibm.com; devel@edk2.groups.io; Xu, Min M > <min.m.xu@intel.com>; Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, > Jordan L <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; > Erdem Aktas <erdemaktas@google.com>; Tom Lendacky > <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > Hi, > > > 1. " the PEI domain has very limited exposure, it's the DXE domain that has full > exposure " > > [Jiewen] I don’t understand how that is concluded, on " limited exposure ", " > full exposure ". > > exposure == "the need to process external input, which an attacker might > use to exploit bugs in edk2 by crafting input data accordingly." [Jiewen] Thanks. > > There isn't much external input to process in PEI phase. Virtual > machines are a bit different than physical machines. They need to > process some input from the host here which describes the virtual > hardware so they can initialize it properly. For example parse the > etc/e820 fw_cfg file to figure how much memory is installed (or parse > the td hob in case tdx is used). > > That platform-specific code for virtual machine initialization must do > careful sanity checking when you don't want trust the VMM of course. > Whenever that code lives in SEC or PEI doesn't change the picture much > though. [Jiewen] I am not sure what information you are trying to convey. I never said that TDVF don’t need check the input after remove PEI. The check is always needed, no matter in PEI or SEC. I totally agree. However, what I want to say is PEI has much more unnecessary code than SEC. You never know the quality of the those unnecessary code. Maybe it has a security bug, but it is just not triggered. For example, can you guarantee PEI code has not bug? Or DXE IPL has not bug? What I did is a process of risk management - if PEI is removed, I don’t need care the risk brought by PEI. Unless you can prove that the risk of PEI is zero, then the risk is there. > > > 2. "bugs in PEI code can't be used to exploit the system when it has > transitioned to the DXE domain." > > [Jiewen] I disagree. A bug in PEI code may already modify the HOB, while the > HOB is an architecture data input for DXE. > > If DXE relies on some malicious data from PEI, DXE might be exploited later. > > Attacking PEI is harder though because the external input it > processes is limited when compared to DXE. > > Once you are transitioned to DXE you can't call into PEI Modules > any more. > > So, how would an attacker trick PEI code into modifying HOBs (or > carrying out other actions under attackers control)? [Jiewen] I would disagree " Attacking PEI is harder though because the external input it processes is limited when compared to DXE " First, the number of extern input != the difficulty of the attack. Second, the number of extern input != the severity of the consequence. On how to trick PEI, if you search the public UEFI BIOS attack history in the web, you can find example. The attack simply used an integer overflow, to cause buffer overflow. And the buffer overflow can be used to inject shellcode, then gain the execution privilege. Finally, it can control the whole system. Modifying HOB is not a difficult task, as long as there is proper vulnerability, being used to gain a memory write. In security world, we always say: Even if you cannot do it, you cannot assume other people cannot do it. Unless you can prove it cannot be done. Otherwise, anything might be possible. That is why people are in favor of crypto notation and formal verification to prove something is correct. > > take care, > Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84028): https://edk2.groups.io/g/devel/message/84028 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, 2021-11-24 at 11:08 +0000, Yao, Jiewen wrote: > > -----Original Message----- > > From: Gerd Hoffmann <kraxel@redhat.com> [...] > > There isn't much external input to process in PEI phase. Virtual > > machines are a bit different than physical machines. They need to > > process some input from the host here which describes the virtual > > hardware so they can initialize it properly. For example parse the > > etc/e820 fw_cfg file to figure how much memory is installed (or > > parse the td hob in case tdx is used). > > > > That platform-specific code for virtual machine initialization must > > do careful sanity checking when you don't want trust the VMM of > > course. Whenever that code lives in SEC or PEI doesn't change the > > picture much though. I don't disagree on this because we don't have a rom root of trust in OVMF ... however it matters for most of the rest of edk2 > [Jiewen] I am not sure what information you are trying to convey. > I never said that TDVF don’t need check the input after remove PEI. > The check is always needed, no matter in PEI or SEC. I totally agree. > > However, what I want to say is PEI has much more unnecessary code > than SEC. > You never know the quality of the those unnecessary code. Maybe it > has a security bug, but it is just not triggered. > For example, can you guarantee PEI code has not bug? Or DXE IPL has > not bug? Code removal to thin down the image is orthogonal to the location of that code ... I don't think anyone objected to securing the path through PEI by reducing unnecessary code; the doubt is that the elimination of PEI somehow improves security. > > What I did is a process of risk management - if PEI is removed, I > don’t need care the risk brought by PEI. Well, as I said above, you can remove the unnecessary code in PEI (and SEC and DXE). However, once you've done that, you don't eliminate risk by removing PEI because all you do is move the necessary code that was in PEI to somewhere else. If you move it to SEC, I agree with Gerd that it doesn't alter the security position much because SEC is a low exposure domain like PEI. If you move it to DXE it actually decreases your security measurably because DXE is a high exposure security domain. > Unless you can prove that the risk of PEI is zero, then the risk is > there. But you don't make it zero by eliminating PEI, you simply move the risk elsewhere because the necessary code still has to execute. > > > 2. "bugs in PEI code can't be used to exploit the system when it > > > has transitioned to the DXE domain." > > > [Jiewen] I disagree. A bug in PEI code may already modify the > > > HOB, while the HOB is an architecture data input for DXE. > > > If DXE relies on some malicious data from PEI, DXE might be > > > exploited later. > > > > Attacking PEI is harder though because the external input it > > processes is limited when compared to DXE. > > > > Once you are transitioned to DXE you can't call into PEI Modules > > any more. > > > > So, how would an attacker trick PEI code into modifying HOBs (or > > carrying out other actions under attackers control)? > > [Jiewen] I would disagree " Attacking PEI is harder though because > the external input it processes is limited when compared to DXE " > First, the number of extern input != the difficulty of the attack. > Second, the number of extern input != the severity of the > consequence. Attacking PEI is harder than attacking DXE because of the limited exposure to external influences. It doesn't mean it can't be done but it does mean exploiting the same code can be harder or impossible in PEI compared to DXE. The key point is there are some classes of bug that can't be exploited in PEI because of the limited exposure. These bugs can be exploited in DXE because of the wider exposure. This is why moving code from DXE to PEI increases the risk. > On how to trick PEI, if you search the public UEFI BIOS attack > history in the web, you can find example. > The attack simply used an integer overflow, to cause buffer overflow. > And the buffer overflow can be used to inject shellcode, then gain > the execution privilege. > Finally, it can control the whole system. > > Modifying HOB is not a difficult task, as long as there is proper > vulnerability, being used to gain a memory write. > > In security world, we always say: Even if you cannot do it, you > cannot assume other people cannot do it. > Unless you can prove it cannot be done. Otherwise, anything might be > possible. > > That is why people are in favor of crypto notation and formal > verification to prove something is correct. There are many ways of improving security. Formal verification has its place, but grows exponentially harder with the complexity of the system. Separation into multiple security domains is also a common technique (which also facilitates formal verification because it reduces the state space). What I worry about is that elimination of one of the UEFI security domains causes code to move to places that makes it more exploitable. James -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84038): https://edk2.groups.io/g/devel/message/84038 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
James I am sorry that it is hard for me to understand your point. To be honest, I am not sure what is objective on the discussion. Are you question the general threat model analysis on UEFI PI architecture? Or are you trying to persuade me we should include PEI in TDVF, because you think it is safer to add code in PEI ? Or something else? Please enlighten me that. Thank you Yao Jiewen > -----Original Message----- > From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of James > Bottomley > Sent: Wednesday, November 24, 2021 9:35 PM > To: devel@edk2.groups.io; Yao, Jiewen <jiewen.yao@intel.com>; Gerd > Hoffmann <kraxel@redhat.com> > Cc: Xu, Min M <min.m.xu@intel.com>; Ard Biesheuvel > <ardb+tianocore@kernel.org>; Justen, Jordan L <jordan.l.justen@intel.com>; > Brijesh Singh <brijesh.singh@amd.com>; Erdem Aktas > <erdemaktas@google.com>; Tom Lendacky <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > On Wed, 2021-11-24 at 11:08 +0000, Yao, Jiewen wrote: > > > -----Original Message----- > > > From: Gerd Hoffmann <kraxel@redhat.com> > [...] > > > There isn't much external input to process in PEI phase. Virtual > > > machines are a bit different than physical machines. They need to > > > process some input from the host here which describes the virtual > > > hardware so they can initialize it properly. For example parse the > > > etc/e820 fw_cfg file to figure how much memory is installed (or > > > parse the td hob in case tdx is used). > > > > > > That platform-specific code for virtual machine initialization must > > > do careful sanity checking when you don't want trust the VMM of > > > course. Whenever that code lives in SEC or PEI doesn't change the > > > picture much though. > > I don't disagree on this because we don't have a rom root of trust in > OVMF ... however it matters for most of the rest of edk2 > > > [Jiewen] I am not sure what information you are trying to convey. > > I never said that TDVF don’t need check the input after remove PEI. > > The check is always needed, no matter in PEI or SEC. I totally agree. > > > > However, what I want to say is PEI has much more unnecessary code > > than SEC. > > You never know the quality of the those unnecessary code. Maybe it > > has a security bug, but it is just not triggered. > > For example, can you guarantee PEI code has not bug? Or DXE IPL has > > not bug? > > Code removal to thin down the image is orthogonal to the location of > that code ... I don't think anyone objected to securing the path > through PEI by reducing unnecessary code; the doubt is that the > elimination of PEI somehow improves security. > > > > > What I did is a process of risk management - if PEI is removed, I > > don’t need care the risk brought by PEI. > > Well, as I said above, you can remove the unnecessary code in PEI (and > SEC and DXE). However, once you've done that, you don't eliminate risk > by removing PEI because all you do is move the necessary code that was > in PEI to somewhere else. If you move it to SEC, I agree with Gerd > that it doesn't alter the security position much because SEC is a low > exposure domain like PEI. If you move it to DXE it actually decreases > your security measurably because DXE is a high exposure security > domain. > > > Unless you can prove that the risk of PEI is zero, then the risk is > > there. > > But you don't make it zero by eliminating PEI, you simply move the risk > elsewhere because the necessary code still has to execute. > > > > > 2. "bugs in PEI code can't be used to exploit the system when it > > > > has transitioned to the DXE domain." > > > > [Jiewen] I disagree. A bug in PEI code may already modify the > > > > HOB, while the HOB is an architecture data input for DXE. > > > > If DXE relies on some malicious data from PEI, DXE might be > > > > exploited later. > > > > > > Attacking PEI is harder though because the external input it > > > processes is limited when compared to DXE. > > > > > > Once you are transitioned to DXE you can't call into PEI Modules > > > any more. > > > > > > So, how would an attacker trick PEI code into modifying HOBs (or > > > carrying out other actions under attackers control)? > > > > [Jiewen] I would disagree " Attacking PEI is harder though because > > the external input it processes is limited when compared to DXE " > > First, the number of extern input != the difficulty of the attack. > > Second, the number of extern input != the severity of the > > consequence. > > Attacking PEI is harder than attacking DXE because of the limited > exposure to external influences. It doesn't mean it can't be done but > it does mean exploiting the same code can be harder or impossible in > PEI compared to DXE. > > The key point is there are some classes of bug that can't be exploited > in PEI because of the limited exposure. These bugs can be exploited in > DXE because of the wider exposure. This is why moving code from DXE to > PEI increases the risk. > > > On how to trick PEI, if you search the public UEFI BIOS attack > > history in the web, you can find example. > > The attack simply used an integer overflow, to cause buffer overflow. > > And the buffer overflow can be used to inject shellcode, then gain > > the execution privilege. > > Finally, it can control the whole system. > > > > Modifying HOB is not a difficult task, as long as there is proper > > vulnerability, being used to gain a memory write. > > > > In security world, we always say: Even if you cannot do it, you > > cannot assume other people cannot do it. > > Unless you can prove it cannot be done. Otherwise, anything might be > > possible. > > > > That is why people are in favor of crypto notation and formal > > verification to prove something is correct. > > There are many ways of improving security. Formal verification has its > place, but grows exponentially harder with the complexity of the > system. Separation into multiple security domains is also a common > technique (which also facilitates formal verification because it > reduces the state space). What I worry about is that elimination of > one of the UEFI security domains causes code to move to places that > makes it more exploitable. > > James > > > > > > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84039): https://edk2.groups.io/g/devel/message/84039 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, 2021-11-24 at 14:03 +0000, Yao, Jiewen wrote: > James > I am sorry that it is hard for me to understand your point. > > To be honest, I am not sure what is objective on the discussion. > Are you question the general threat model analysis on UEFI PI > architecture? The object is for me to understand why you think eliminating PEI improves security because I think it moves it in the opposite direction. > Or are you trying to persuade me we should include PEI in TDVF, > because you think it is safer to add code in PEI ? > Or something else? > > Please enlighten me that. Somewhere a decision was taken to remove PEI from the OVMF that is used to bring up TDX on the grounds of "improving security". I'm struggling to understand the rationale for this. James -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84040): https://edk2.groups.io/g/devel/message/84040 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
OK. Got it.
Let me explain it in more detail.
Let's assume PEI phase include 3 major classes {PEI Core, PEI Arch PEM*, Feature X*}. * means 0~multiple.
To all of us what really matter is Feature X, the existence of PEI Core + PEI Arch PEIM* is to support Feature X*.
From architecture perspective, if a platform is complex (e.g. there are lots of Feature X*) and feature X* have lots of inter-dependency, then PEI is a good place to coordinate the Feature X*. (Example, Feature X* are memory init and silicon init)
But if a platform simple (e.g. there is only few Feature X*) and feature X* have no much dependency, the including PEI does not bring too much value. That is why you see multiple platforms in EDKII does not include PEI.
From security perspective, Feature X* shall always perform check, no matter where the feature X sits in SEC, PEI or DXE. The risk of Feature X always exists, no matter where the feature X sits in SEC, PEI or DXE. I completely agree.
At same time, the PEI Core + PEI Arch PEIM* also bring unknown security risk. That was TRUE in history. It did happen. So my motivation to remove PEI phase is to reduce the risk introduced by PEI Core + PEI Arch PEIM*. Again, I do not mean to reduce the risk introduced by Feature X.
Now it seems we are really debating two things: (please correct me if I am wrong)
1) What is risk introduced by PEI Core + PEI Arch PEIM* ?
2) What is the delta of risk by moving Feature X from PEI to other place (SEC or DXE) ?
For 1), my answer is that the risk is definitely bigger than zero, based upon history data. (This is an objective answer.) That is the main of my motivation to make it become zero by removing PEI.
For 2), my answer is that the delta is almost 0, based upon my experience. (I admit this is a subjective answer, because I cannot prove.). We are trying our best to reduce the risk of the Feature A* as well. Assuming delta of risk <= risk, then it will become very smaller.
So, my judgement is by removing PEI, we can reduce the risk introduce by PEI Core + PEI Arch PEIM*. Reducing code == Reducing Security Risk.
Also, this gives us a chance to focus on reviewing Feature X itself, instead of the complex interaction with PEI Core + PEI Arch PEIM*. Reducing complexity == Reducing Security Risk.
(In history, we got lots of complain on the complexity of the non-deterministic flow by CALLBACK and NOTIFY function in Core. A feature developer might not have idea on when the code will be called, and what the system status is at that moment.)
Thank you
Yao Jiewen
> -----Original Message-----
> From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of James
> Bottomley
> Sent: Wednesday, November 24, 2021 10:07 PM
> To: Yao, Jiewen <jiewen.yao@intel.com>; devel@edk2.groups.io; Gerd
> Hoffmann <kraxel@redhat.com>
> Cc: Xu, Min M <min.m.xu@intel.com>; Ard Biesheuvel
> <ardb+tianocore@kernel.org>; Justen, Jordan L <jordan.l.justen@intel.com>;
> Brijesh Singh <brijesh.singh@amd.com>; Erdem Aktas
> <erdemaktas@google.com>; Tom Lendacky <thomas.lendacky@amd.com>
> Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to
> support Tdx
>
> On Wed, 2021-11-24 at 14:03 +0000, Yao, Jiewen wrote:
> > James
> > I am sorry that it is hard for me to understand your point.
> >
> > To be honest, I am not sure what is objective on the discussion.
> > Are you question the general threat model analysis on UEFI PI
> > architecture?
>
> The object is for me to understand why you think eliminating PEI
> improves security because I think it moves it in the opposite
> direction.
>
> > Or are you trying to persuade me we should include PEI in TDVF,
> > because you think it is safer to add code in PEI ?
> > Or something else?
> >
> > Please enlighten me that.
>
> Somewhere a decision was taken to remove PEI from the OVMF that is used
> to bring up TDX on the grounds of "improving security". I'm struggling
> to understand the rationale for this.
>
> James
>
>
>
>
>
>
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#84041): https://edk2.groups.io/g/devel/message/84041
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Hi, > So, my judgement is by removing PEI, we can reduce the risk introduce > by PEI Core + PEI Arch PEIM*. Reducing code == Reducing Security Risk. Yes, PEI Core goes away. No, PEI Arch PEIM (aka OvmfPkg/PlatformPei) wouldn't go away, you would only move the code to SEC or DXE phase, the platform initialization has to happen somewhere. Moving code to DXE has its problems as outlines by James at length. Moving code to SEC has its problems too. SEC is a much more restricted environment. A direct consequence is that you have re-invented multiprocessor job scheduling (using tdx mailbox) instead of using standard mp service for parallel accept. I do not account that as "reducing complexity". And I've not yet seen the other changes you have done for pei-less tdvf ... take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84081): https://edk2.groups.io/g/devel/message/84081 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> -----Original Message----- > From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Gerd > Hoffmann > Sent: Thursday, November 25, 2021 4:32 PM > To: Yao, Jiewen <jiewen.yao@intel.com> > Cc: devel@edk2.groups.io; jejb@linux.ibm.com; Xu, Min M > <min.m.xu@intel.com>; Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, > Jordan L <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; > Erdem Aktas <erdemaktas@google.com>; Tom Lendacky > <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > Hi, > > > So, my judgement is by removing PEI, we can reduce the risk introduce > > by PEI Core + PEI Arch PEIM*. Reducing code == Reducing Security Risk. > > Yes, PEI Core goes away. > > No, PEI Arch PEIM (aka OvmfPkg/PlatformPei) wouldn't go away, you would > only move the code to SEC or DXE phase, the platform initialization has > to happen somewhere. [Jiewen] "Arch PEIM" refers to the PEIM required by the PEI Core. The "platform initialization" is FeatureX. It is something you have to do, no mater you have PEI or non-PEI. > Moving code to DXE has its problems as outlines by James at length. [Jiewen] I don't think this statement is right, with assumption that your interpretation for James' "exposure" is correct: "Exposure == External Interface", and assumption that we are discussing general design principle. I give my reason below: 1) From architecture perspective. Please refer to PI specification 1.7A (https://uefi.org/sites/default/files/resources/PI_Spec_1_7_A_final_May1.pdf) Section 2.1 - Introduction. "Philosophically, the PEI phase is intended to be the thinnest amount of code to achieve the ends listed above. As such, any more sophisticated algorithms or processing should be deferred to the DXE phase of execution." In history, the first EFI 1.02 implementation does not have PEI. PEI was added, just because the EFI need permanent RAM, while it is not TRUE for the platform that requires DRAM init. If the DRAM init simple, then you don't PEI, because it can be done in SEC. PEI was invented, because the memory init becomes more and more complicated and has more dependency. We want to have a better architecture to support that. The *intention* of PEI is *only* to initialize the environment for the EFI. That is why it is called as "Pre-EFI-initialization". This is very similar to the coreboot - ROM stage and RAM stage. (You can treat PEI as ROM stage and DXE as RAM stage) PEI should only include memory init (biggest part) and related code. If you don't have a strong reason to put a feature to PEI, then you had better to put it to DXE, according to the architecture direction. 2) From security perspective. I am not convinced that "exposure (external interface)" is a good reason to decide where the module should be. Thinking of this, if you prefer to put a module to the PEI because PEI has *less* exposure, then PEI will have *more* exposure after that. If you move half of DXE features to PEI, then PEI has same exposure as DXE. What benefit we can get? In EDKII, the general security guideline is that module location should be based upon "privilege", following the security principle - "least privilege". If a module does not requires high privilege, then it should be put to lower privilege location. In one of my email, I listed Trust Region (TR) concept. I copy them here again. (I added SEC/BDS/RT to them as well, because I miss that in the first email) ================ Second, in EDKII, we have similar concept - we call trust region (TR): 1) Recovery Trust Region (SEC, PEI) 2) Main Trust Region (DXE-before EndOfDxe) 3) MM Trust Region (SMM) 4) Boot Trust Region (DXE w/o CSM-after EndOfDxe, BDS) 5) Legacy Trust Region (DXE with CSM-after EndOfDxe, BDS) 6) OS Trust Region (OS Boot, RT) We use term "transition" to describe the domain jump. We don't use term "isolation". We use "isolation" where two co-existed RT cannot tamper each other. For example, MM trust region and Boot Trust Region are isolated. Actually, the only isolation example we have in BIOS is x86 SMM or ARM TrustZone. ================ For example, SMM has much smaller exposure (external interface) than DXE. But SMM has much higher privilege than DXE by design. However, it does not make sense to move feature from DXE to SMM. All direction we have is to remove module from SMM to DXE, because the privilege concern, regardless of the exposure. The SEC and PEI are almost same from TR perspective, they are in recovery TR. Recovery TR has a little high privilege than Main TR, because there are some security LOCK activities happened in Recovery TR. For a specific case, PEI might be a better place. I agree. For example, flash lock. I will definitely vote to put it to PEI. But in general, I don't see why moving feature from PEI to DXE becomes a problem, unless you can explain in detail what the "problem" is. > Moving code to SEC has its problems too. SEC is a much more restricted > environment. A direct consequence is that you have re-invented > multiprocessor job scheduling (using tdx mailbox) instead of using > standard mp service for parallel accept. I do not account that as > "reducing complexity". [Jiewen] OK. Let me explain multiprocessor related topic. I don't think we intent to "reduce" complexity in this area. I would say, it is same with or without PEI. TDX (also SEV) has special requirement to *accept* memory, before use it. The accepting is time consuming process. So the motivation is to use multiprocessor to accelerate the process. We have at least three architecture places to accept the memory - SEC, PEI and DXE. A) Accept in SEC We need write multiprocessor code in SEC. This is already mandatory, even without accepting memory. The TDX architecture already changes the reset vector flow - ALL processors jumps to the reset vector at same time. Having multiprocessor code in SEC is unavoidable. We have to do it, to rendez-vous APs and initialize mailbox. The code is written because of TDX architecture change, not because memory accepting. So I won't treat it as a burden to add additional feature to memory accept in SEC. At same time, the benefic to accept memory in SEC is significant, you can have memory ready for use. I will explain later why that matters. B) Accept in PEI PEI has MP_PPI, that is TRUE. But the problem is: MP_PPI starts *later*. MP_PPI starts *after* the memory discovery (https://github.com/tianocore/edk2/blob/master/UefiCpuPkg/CpuMpPei/CpuMpPei.c#L706), which mean the process will be: Step 1: TdxPeim accepts PEI required memory without MP_PPI Step 2: PlatformPei installs PEI required memory. Step 3: MP_PPI starts. Step 4: TdxPeim accepts additional memory with MP_PPI Now, you can see the benefit to accept PEI memory is not there. NOTE: PEI memory is ~64M if GPAW is 48, it is ~98M if GPAW is 52, which is a big number. That mean, with the solution you proposed (use PEI MP_PPI), we will have trouble on boot performance. C) Accept in DXE Almost same as PEI. You have to accept some memory before launch MP_SERVICE. Same boot performance issue. As such, we conclude that doing memory accept in SEC is the best choice for TDX architecture. You may argue that we re-invented is MP work in SEC. Right, but this is done because of TDX architecture. It is NOT related to config-A (w/ PEI) or config-B (w/o PEI). > And I've not yet seen the other changes you > have done for pei-less tdvf ... [Jiewen] That is because there is no many change. :-) Here is just some early code. 1) Platform Init In config-B, it is at https://github.com/tianocore/edk2-staging/blob/TDVF/OvmfPkg/Library/TdvfPlatformLibQemu/Platform.c In config-A, it is at https://github.com/mxu9/edk2/blob/tdvf_wave2.v3/OvmfPkg/PlatformPei/Platform.c 2) DXE hand off In config-B, it is at https://github.com/tianocore/edk2-staging/blob/TDVF/OvmfPkg/Library/TdxStartupLib/DxeLoad.c In config-A, it is at https://github.com/tianocore/edk2/blob/master/MdeModulePkg/Core/DxeIplPeim/DxeLoad.c The reset should be almost same in from SEC/PEI perspective. I don't see a reason to carry whole PEI just to run platform init (~300 line of code). For config-B, Min is working on it and doing clean up. You are welcome to provide feedback after we send out patch for config-B. > take care, > Gerd > > > > > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84092): https://edk2.groups.io/g/devel/message/84092 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi, > Please refer to PI specification 1.7A (https://uefi.org/sites/default/files/resources/PI_Spec_1_7_A_final_May1.pdf) > Section 2.1 - Introduction. > "Philosophically, the PEI phase is intended to be the thinnest amount > of code to achieve the ends listed above. As such, any more > sophisticated algorithms or processing should be deferred to the DXE > phase of execution." Well, that might have been the original intention. Reality is that a bunch of security-related stuff ended up in PEI too. Support for TPM, SMM, suspend. And I think the motivation for that is exactly what James described: Given that most communication with external entities happens in the DXE phase it is much harder to trick PEI code because there are simply less attack vectors available. > I am not convinced that "exposure (external interface)" is a good > reason to decide where the module should be. Thinking of this, if you > prefer to put a module to the PEI because PEI has *less* exposure, > then PEI will have *more* exposure after that. If you move half of > DXE features to PEI, then PEI has same exposure as DXE. What benefit > we can get? Why is TPM and SMM and suspend support in PEI not DXE today? > > Moving code to SEC has its problems too. SEC is a much more restricted > > environment. A direct consequence is that you have re-invented > > multiprocessor job scheduling (using tdx mailbox) instead of using > > standard mp service for parallel accept. I do not account that as > > "reducing complexity". > > [Jiewen] OK. Let me explain multiprocessor related topic. > > I don't think we intent to "reduce" complexity in this area. I would > say, it is same with or without PEI. > > TDX (also SEV) has special requirement to *accept* memory, before use > it. The accepting is time consuming process. So the motivation is to > use multiprocessor to accelerate the process. > > We have at least three architecture places to accept the memory - SEC, > PEI and DXE. Well, I want focus on how all this will look like long-term, i.e. once we have lazy accept implemented. I don't think it makes sense to put much effort into optimizing a workflow which will only be temporary anyway. The lazy accept concept pretty much implies that the vast majority of memory will either be accepted in the DXE phase, or will not be accepted by the firmware at all and the linux kernel will handle that instead. I expect in the future SEC and/or PEI will accept barely enough memory that the firmware is able to enter the DXE phase and initialize core memory management. Then lazy accept takes over. > A) Accept in SEC > We need write multiprocessor code in SEC. This is already mandatory, > even without accepting memory. The TDX architecture already changes > the reset vector flow - ALL processors jumps to the reset vector at > same time. Having multiprocessor code in SEC is unavoidable. We have > to do it, to rendez-vous APs and initialize mailbox. Sure. > The code is written because of TDX architecture change, not because > memory accepting. So I won't treat it as a burden to add additional > feature to memory accept in SEC. That is the point where you start re-inventing the wheel though. You add logic to distribute memory acceptance jobs to the APs. I'd suggest to add full MP service support to TDX (probably also using the mailbox), then use MP service to distribute memory acceptance jobs to the APs. I think you will need that anyway for lazy accept, to do parallel accept in DXE phase. > B) Accept in PEI > PEI has MP_PPI, that is TRUE. But the problem is: MP_PPI starts *later*. > MP_PPI starts *after* the memory discovery (https://github.com/tianocore/edk2/blob/master/UefiCpuPkg/CpuMpPei/CpuMpPei.c#L706), which mean the process will be: > Step 1: TdxPeim accepts PEI required memory without MP_PPI > Step 2: PlatformPei installs PEI required memory. > Step 3: MP_PPI starts. > Step 4: TdxPeim accepts additional memory with MP_PPI Yes. Or just don't initialize MP in PEI and do that in DXE only. Lazy accept will need that anyway. > Now, you can see the benefit to accept PEI memory is not there. > NOTE: PEI memory is ~64M if GPAW is 48, it is ~98M if GPAW is 52, which is a big number. What is all this memory needed for? Why do you need 32M additional memory for 5-level paging? Is that page table memory? If so, how does lazy accept change the picture? > As such, we conclude that doing memory accept in SEC is the best > choice for TDX architecture. Not convinced this is still the case with lazy accept on the horizon. > For config-B, Min is working on it and doing clean up. > You are welcome to provide feedback after we send out patch for config-B. Will do for sure. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84203): https://edk2.groups.io/g/devel/message/84203 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> -----Original Message----- > From: kraxel@redhat.com <kraxel@redhat.com> > Sent: Wednesday, December 1, 2021 9:55 PM > To: Yao, Jiewen <jiewen.yao@intel.com> > Cc: devel@edk2.groups.io; jejb@linux.ibm.com; Xu, Min M > <min.m.xu@intel.com>; Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, > Jordan L <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; > Erdem Aktas <erdemaktas@google.com>; Tom Lendacky > <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > Hi, > > > Please refer to PI specification 1.7A > (https://uefi.org/sites/default/files/resources/PI_Spec_1_7_A_final_May1.pdf) > > Section 2.1 - Introduction. > > > "Philosophically, the PEI phase is intended to be the thinnest amount > > of code to achieve the ends listed above. As such, any more > > sophisticated algorithms or processing should be deferred to the DXE > > phase of execution." > > Well, that might have been the original intention. Reality is that a > bunch of security-related stuff ended up in PEI too. Support for TPM, > SMM, suspend. And I think the motivation for that is exactly what James > described: Given that most communication with external entities happens > in the DXE phase it is much harder to trick PEI code because there are > simply less attack vectors available. [Jiewen] Again, I feel lost. Would you please clarify what is your purpose for this discussion? Yes, Security related stuff in PEI, that is not a problem. For example, we moved flash lock from DXE to PEI. (I already describe that in my previous email.) The key is *privilege*. If you don't need PEI privilege, you don't need move to PEI. 1) For "TPM" feature. In history, the Root-of-trust for measurement (RTM) belongs to Recovery TR. That is why it is in PEI naturally. 2) "SMM" support is in DXE today. What do you mean SMM support in PEI ? 3) "Suspend" is S3 *feature*, it is clearly documented in PI spec. In S3, DXE does not run. You have to put Suspend code in PEI. Yes, you give some examples. But I don't see how the examples support your statement on "exposure". IMHO, they are totally unrelated. > > I am not convinced that "exposure (external interface)" is a good > > reason to decide where the module should be. Thinking of this, if you > > prefer to put a module to the PEI because PEI has *less* exposure, > > then PEI will have *more* exposure after that. If you move half of > > DXE features to PEI, then PEI has same exposure as DXE. What benefit > > we can get? > > Why is TPM and SMM and suspend support in PEI not DXE today? [Jiewen] See above. > > > > Moving code to SEC has its problems too. SEC is a much more restricted > > > environment. A direct consequence is that you have re-invented > > > multiprocessor job scheduling (using tdx mailbox) instead of using > > > standard mp service for parallel accept. I do not account that as > > > "reducing complexity". > > > > [Jiewen] OK. Let me explain multiprocessor related topic. > > > > I don't think we intent to "reduce" complexity in this area. I would > > say, it is same with or without PEI. > > > > TDX (also SEV) has special requirement to *accept* memory, before use > > it. The accepting is time consuming process. So the motivation is to > > use multiprocessor to accelerate the process. > > > > We have at least three architecture places to accept the memory - SEC, > > PEI and DXE. > > Well, I want focus on how all this will look like long-term, i.e. once > we have lazy accept implemented. I don't think it makes sense to put > much effort into optimizing a workflow which will only be temporary > anyway. [Jiewen] This is the long-term solution. Lazy accept and MP accept are two required feature. "Lazy accept" just mean you can do things later, but you still need do it. "MP accept" means you can do things much quicker. I don't think we can remove MP accept just because we have lazy accept. > The lazy accept concept pretty much implies that the vast majority of > memory will either be accepted in the DXE phase, or will not be accepted > by the firmware at all and the linux kernel will handle that instead. > > I expect in the future SEC and/or PEI will accept barely enough memory > that the firmware is able to enter the DXE phase and initialize core > memory management. Then lazy accept takes over. [Jiewen] the "barely enough memory" is 64M and it takes long time to accept if you don't use MP. That is how SEC/PEI/DXE designed today. > > A) Accept in SEC > > We need write multiprocessor code in SEC. This is already mandatory, > > even without accepting memory. The TDX architecture already changes > > the reset vector flow - ALL processors jumps to the reset vector at > > same time. Having multiprocessor code in SEC is unavoidable. We have > > to do it, to rendez-vous APs and initialize mailbox. > > Sure. > > > The code is written because of TDX architecture change, not because > > memory accepting. So I won't treat it as a burden to add additional > > feature to memory accept in SEC. > > That is the point where you start re-inventing the wheel though. > You add logic to distribute memory acceptance jobs to the APs. > I'd suggest to add full MP service support to TDX (probably also using > the mailbox), then use MP service to distribute memory acceptance jobs > to the APs. I think you will need that anyway for lazy accept, to do > parallel accept in DXE phase. [Jiewen] I think I have stated it clearly that this is due to TDX architecture, we have to rendezvous all APs in SEC. So the MP code SEC is unavoidable. We have to reinvent the wheel in some way. > > > B) Accept in PEI > > PEI has MP_PPI, that is TRUE. But the problem is: MP_PPI starts *later*. > > MP_PPI starts *after* the memory discovery > (https://github.com/tianocore/edk2/blob/master/UefiCpuPkg/CpuMpPei/CpuM > pPei.c#L706), which mean the process will be: > > Step 1: TdxPeim accepts PEI required memory without MP_PPI > > Step 2: PlatformPei installs PEI required memory. > > Step 3: MP_PPI starts. > > Step 4: TdxPeim accepts additional memory with MP_PPI > > Yes. Or just don't initialize MP in PEI and do that in DXE only. > Lazy accept will need that anyway. > > > Now, you can see the benefit to accept PEI memory is not there. > > NOTE: PEI memory is ~64M if GPAW is 48, it is ~98M if GPAW is 52, which is a > big number. > > What is all this memory needed for? [Jiewen] These are initial memory for PEI Core and DXE Core to initialize the content. If you don't have initial memory, the core will fail. > Why do you need 32M additional memory for 5-level paging? > Is that page table memory? If so, how does lazy accept change the > picture? > > > As such, we conclude that doing memory accept in SEC is the best > > choice for TDX architecture. > > Not convinced this is still the case with lazy accept on the horizon. [Jiewen] I think MP accept is irrelevant to lazy accept, as I explained before. Having "lazy accept" != you can drop "MP accept" in SEC, because SEC is to "accept initial memory" before you can do lazy. > > > For config-B, Min is working on it and doing clean up. > > You are welcome to provide feedback after we send out patch for config-B. > > Will do for sure. > > take care, > Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84245): https://edk2.groups.io/g/devel/message/84245 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi, > [Jiewen] Again, I feel lost. > > Would you please clarify what is your purpose for this discussion? > > Yes, Security related stuff in PEI, that is not a problem. For > example, we moved flash lock from DXE to PEI. (I already describe that > in my previous email.) Well, you tried to make the point that PEI shouldn't do anything beyond memory initialization. Which might have been correct for the initial design, but meanwhile it is not true any more. > The key is *privilege*. If you don't need PEI privilege, you don't need move to PEI. > 2) "SMM" support is in DXE today. What do you mean SMM support in PEI ? ovmf has a pei module for smm support (see OvmfPkg/SmmAccess/SmmAccessPei.inf). > But I don't see how the examples support your statement on "exposure". Well, lets stick to the flash lock example. Moving it from DXE to PEI makes it less exposed. It'll run before DXE starts processing externally controlled input (efi vars, kbd input, disk reads, option roms, ...). So trying trick it into not actually locking the flash is much harder. Or, to put it into another way, it reduces the attack surface for the code with higher privilege. (it's of course also need to make sure you can't unlock flash with a suspend-resume cycle). > > Well, I want focus on how all this will look like long-term, i.e. once > > we have lazy accept implemented. I don't think it makes sense to put > > much effort into optimizing a workflow which will only be temporary > > anyway. > > [Jiewen] This is the long-term solution. > Lazy accept and MP accept are two required feature. > > "Lazy accept" just mean you can do things later, but you still need do it. > "MP accept" means you can do things much quicker. > > I don't think we can remove MP accept just because we have lazy accept. I don't want remove it. But with lazy accept you have more options to implement it. No need to go for assembler code in SEC, you can use MP service with standard C code in PEI or DXE. > [Jiewen] the "barely enough memory" is 64M and it takes long time to > accept if you don't use MP. If I remember the numbers correctly (roughly 4 seconds for 2G on a single processor) that "long time" would be roughly 12 ms for 64M. > > That is the point where you start re-inventing the wheel though. > > You add logic to distribute memory acceptance jobs to the APs. > > I'd suggest to add full MP service support to TDX (probably also using > > the mailbox), then use MP service to distribute memory acceptance jobs > > to the APs. I think you will need that anyway for lazy accept, to do > > parallel accept in DXE phase. > > [Jiewen] I think I have stated it clearly that this is due to TDX > architecture, we have to rendezvous all APs in SEC. So the MP code > SEC is unavoidable. We have to reinvent the wheel in some way. Well, adding TDX rendezvous support isn't re-inventing the wheel, you surely need that, no question. But then you go create your own job scheduler (also accept job implementation), all in assembler, instead of using standard edk2 MP services with more readable C code. *That* is where you re-invent the wheel. > > > Now, you can see the benefit to accept PEI memory is not there. > > > NOTE: PEI memory is ~64M if GPAW is 48, it is ~98M if GPAW is 52, which is a > > big number. > > > > What is all this memory needed for? > > [Jiewen] These are initial memory for PEI Core and DXE Core to initialize the content. > If you don't have initial memory, the core will fail. Where does the 50% increase for GPAW=52 comes from? take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84409): https://edk2.groups.io/g/devel/message/84409 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
> -----Original Message----- > From: kraxel@redhat.com <kraxel@redhat.com> > Sent: Monday, December 6, 2021 10:58 PM > To: Yao, Jiewen <jiewen.yao@intel.com> > Cc: devel@edk2.groups.io; jejb@linux.ibm.com; Xu, Min M > <min.m.xu@intel.com>; Ard Biesheuvel <ardb+tianocore@kernel.org>; Justen, > Jordan L <jordan.l.justen@intel.com>; Brijesh Singh <brijesh.singh@amd.com>; > Erdem Aktas <erdemaktas@google.com>; Tom Lendacky > <thomas.lendacky@amd.com> > Subject: Re: [edk2-devel] [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to > support Tdx > > Hi, > > > [Jiewen] Again, I feel lost. > > > > Would you please clarify what is your purpose for this discussion? > > > > Yes, Security related stuff in PEI, that is not a problem. For > > example, we moved flash lock from DXE to PEI. (I already describe that > > in my previous email.) > > Well, you tried to make the point that PEI shouldn't do anything beyond > memory initialization. Which might have been correct for the initial > design, but meanwhile it is not true any more. [Jiewen] No, I am not making this point. My point is : It is legal to move a feature from PEI to DXE. My understanding is that: you are making point - "It is problematic that moving a feature from PEI to DXE". I take "feature" as "any generic feature". I accept the statement that "It *may* be problematic that moving a *specific (such as security)* feature from PEI to DXE". But I disagree the general statement that moving any feature from PEI to DXE will cause problem. For PEI, my point is 1) According to PI spec, PEI is minimal so it is legal to move feature from PEI to DXE. 2) We do see some examples that we move from DXE to PEI, but that is case by case. It does not mean you cannot move feature from PEI to DXE. > > > The key is *privilege*. If you don't need PEI privilege, you don't need move to > PEI. > > > 2) "SMM" support is in DXE today. What do you mean SMM support in PEI ? > > ovmf has a pei module for smm support (see > OvmfPkg/SmmAccess/SmmAccessPei.inf). [Jiewen] The SmmAccessPei is for S3 resume. It is already documented to PI spec. I don't see anything wrong here. > > But I don't see how the examples support your statement on "exposure". > > Well, lets stick to the flash lock example. Moving it from DXE to PEI > makes it less exposed. It'll run before DXE starts processing > externally controlled input (efi vars, kbd input, disk reads, option > roms, ...). So trying trick it into not actually locking the flash is > much harder. [Jiewen] I am tired on using word "exposure". You gave the definition that "exposure == external interface". And your statement is that we should move "feature" to the component with less exposure. - I take "feature" as "any generic feature". In my opinion, less exposure != high privilege. There might be some relationship. A high privilege module may have less exposure usually. However, the privilege is *cause*, less exposure is *consequence*. Not verse versa. We made judgement based upon privilege, not the exposure. I accept the statement that "We should move a *security* feature to component with *high privilege*". But I disagree the general statement that we should moving any feature to less exposure. I give a counter example - SMM. SMM has less external interface, but we cannot move "any generic feature" to SMM. > > Or, to put it into another way, it reduces the attack surface for the > code with higher privilege. > > (it's of course also need to make sure you can't unlock flash with a > suspend-resume cycle). > > > > Well, I want focus on how all this will look like long-term, i.e. once > > > we have lazy accept implemented. I don't think it makes sense to put > > > much effort into optimizing a workflow which will only be temporary > > > anyway. > > > > [Jiewen] This is the long-term solution. > > Lazy accept and MP accept are two required feature. > > > > "Lazy accept" just mean you can do things later, but you still need do it. > > "MP accept" means you can do things much quicker. > > > > I don't think we can remove MP accept just because we have lazy accept. > > I don't want remove it. But with lazy accept you have more options to > implement it. No need to go for assembler code in SEC, you can use MP > service with standard C code in PEI or DXE. > > > [Jiewen] the "barely enough memory" is 64M and it takes long time to > > accept if you don't use MP. > > If I remember the numbers correctly (roughly 4 seconds for 2G on a > single processor) that "long time" would be roughly 12 ms for 64M. [Jiewen] OK, I talked with Min again. 12ms is not right data today. We have bigger number, but I cannot share the data according to legal reason. But I agree with your statement that, if the data is small enough, then we don't need MP in sec. I propose this way: 1) In first patch, we drop MP in SEC. 2) We can revisit if it is really needed later, when the TDX platform is about to launch. I believe we will have more precise data at that moment. > > > > That is the point where you start re-inventing the wheel though. > > > You add logic to distribute memory acceptance jobs to the APs. > > > I'd suggest to add full MP service support to TDX (probably also using > > > the mailbox), then use MP service to distribute memory acceptance jobs > > > to the APs. I think you will need that anyway for lazy accept, to do > > > parallel accept in DXE phase. > > > > > [Jiewen] I think I have stated it clearly that this is due to TDX > > architecture, we have to rendezvous all APs in SEC. So the MP code > > SEC is unavoidable. We have to reinvent the wheel in some way. > > Well, adding TDX rendezvous support isn't re-inventing the wheel, you > surely need that, no question. > > But then you go create your own job scheduler (also accept job > implementation), all in assembler, instead of using standard edk2 MP > services with more readable C code. *That* is where you re-invent the > wheel. > > > > > Now, you can see the benefit to accept PEI memory is not there. > > > > NOTE: PEI memory is ~64M if GPAW is 48, it is ~98M if GPAW is 52, which is > a > > > big number. > > > > > > What is all this memory needed for? > > > > [Jiewen] These are initial memory for PEI Core and DXE Core to initialize the > content. > > If you don't have initial memory, the core will fail. > > Where does the 50% increase for GPAW=52 comes from? [Jiewen] Yes, this is about page table. The reason is that UEFI spec requires you to map all memory. You have to create page table for all. > > take care, > Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84438): https://edk2.groups.io/g/devel/message/84438 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi,
> [Jiewen] OK, I talked with Min again. 12ms is not right data today.
> We have bigger number, but I cannot share the data according to legal reason.
>
> But I agree with your statement that, if the data is small enough, then we don't need MP in sec.
>
> I propose this way:
> 1) In first patch, we drop MP in SEC.
Yes. Next implement lazy accept ...
> 2) We can revisit if it is really needed later, when the TDX platform is about to launch.
... then revisit where we stand in terms of boot performance.
And, yes, doing that on the final tdx platform hardware instead
of preliminary development hardware makes sense too.
> > Where does the 50% increase for GPAW=52 comes from?
>
> [Jiewen] Yes, this is about page table.
> The reason is that UEFI spec requires you to map all memory. You have to create page table for all.
Seems that has changed with the latest (2.9) revision of the specs which
explicitly excludes unaccepted memory. From section 2.3.4:
Paging mode is enabled and any memory space defined by the UEFI
memory map is identity mapped (virtual address equals physical
address), although the attributes of certain regions may not have
all read, write, and execute attributes or be unmarked for purposes
of platform protection. The mappings to other regions, such as those
for unaccepted memory, are undefined and may vary from
implementation to implementation.
So implementing lazy accept should bring the initial memory footprint
down because page tables for unaccepted memory are not needed in
SEC/PEI. We can lazily allocate them in DXE instead when accepting
memory (either all memory, or just enough to load the linux kernel and
have linux accept the remaining memory).
take care,
Gerd
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#84448): https://edk2.groups.io/g/devel/message/84448
Mute This Topic: https://groups.io/mt/86739864/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
On December 7, 2021 4:05 PM, Gerd Hoffmann wrote: > > [Jiewen] OK, I talked with Min again. 12ms is not right data today. > > We have bigger number, but I cannot share the data according to legal > reason. > > > > But I agree with your statement that, if the data is small enough, then we > don't need MP in sec. > > > > I propose this way: > > 1) In first patch, we drop MP in SEC. > > Yes. Next implement lazy accept ... > > > 2) We can revisit if it is really needed later, when the TDX platform is about > to launch. > > ... then revisit where we stand in terms of boot performance. > > And, yes, doing that on the final tdx platform hardware instead of preliminary > development hardware makes sense too. > > > > Where does the 50% increase for GPAW=52 comes from? > > > > [Jiewen] Yes, this is about page table. > > The reason is that UEFI spec requires you to map all memory. You have to > create page table for all. > > Seems that has changed with the latest (2.9) revision of the specs which > explicitly excludes unaccepted memory. From section 2.3.4: > > Paging mode is enabled and any memory space defined by the UEFI > memory map is identity mapped (virtual address equals physical > address), although the attributes of certain regions may not have > all read, write, and execute attributes or be unmarked for purposes > of platform protection. The mappings to other regions, such as those > for unaccepted memory, are undefined and may vary from > implementation to implementation. > > So implementing lazy accept should bring the initial memory footprint down > because page tables for unaccepted memory are not needed in SEC/PEI. We > can lazily allocate them in DXE instead when accepting memory (either all > memory, or just enough to load the linux kernel and have linux accept the > remaining memory). So as the first step I will submit the patch-set without MP in SEC. Lazy accept will be a separate patch-set after that. In the Lazy accept there are changes in multiply places, such as accept initial memory, publish pei memory, transfer to DXE hob list, DXE Core GCD services, Shell(memmap). Also we have to consider accept more memory in Pool/Page functions when OOM occurs. Thanks Min -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#84488): https://edk2.groups.io/g/devel/message/84488 Mute This Topic: https://groups.io/mt/86739864/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
© 2016 - 2026 Red Hat, Inc.