:p
atchew
Login
During review of TDX guests on Hyper-V patchset Dave pointed to the potential race between changing page private/shared status and load_unaligned_zeropad(). Fix the issue. v2: - Add more info in commit message of the first patch. - Move enc_status_change_finish_noop() into a separate patch. - Fix typo in commit message and comment. Kirill A. Shutemov (3): x86/mm: Allow guest.enc_status_change_prepare() to fail x86/tdx: Fix race between set_memory_encrypted() and load_unaligned_zeropad() x86/mm: Fix enc_status_change_finish_noop() arch/x86/coco/tdx/tdx.c | 56 +++++++++++++++++++++++++++++++-- arch/x86/include/asm/x86_init.h | 2 +- arch/x86/kernel/x86_init.c | 4 +-- arch/x86/mm/mem_encrypt_amd.c | 4 ++- arch/x86/mm/pat/set_memory.c | 3 +- 5 files changed, 61 insertions(+), 8 deletions(-) -- 2.39.3
TDX code is going to provide guest.enc_status_change_prepare() that is able to fail. TDX will use the call to convert the GPA range from shared to private. This operation can fail. Add a way to return an error from the callback. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: stable@vger.kernel.org --- arch/x86/include/asm/x86_init.h | 2 +- arch/x86/kernel/x86_init.c | 2 +- arch/x86/mm/mem_encrypt_amd.c | 4 +++- arch/x86/mm/pat/set_memory.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -XXX,XX +XXX,XX @@ struct x86_init_acpi { * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status */ struct x86_guest { - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -XXX,XX +XXX,XX @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -XXX,XX +XXX,XX @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) #endif } -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -XXX,XX +XXX,XX @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); + + return true; } /* Return true unconditionally: return value doesn't matter for the SEV side */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -XXX,XX +XXX,XX @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + return -EIO; ret = __change_page_attr_set_clr(&cpa, 1); -- 2.39.3
Touching privately mapped GPA that is not properly converted to private with MapGPA and accepted leads to unrecoverable exit to VMM. load_unaligned_zeropad() can touch memory that is not owned by the caller, but just happened to next after the owned memory. This load_unaligned_zeropad() behaviour makes it important when kernel asks VMM to convert a GPA from shared to private or back. Kernel must never have a page mapped into direct mapping (and aliases) as private when the GPA is already converted to shared or when GPA is not yet converted to private. guest.enc_status_change_prepare() called before adjusting direct mapping and therefore it is responsible for converting the memory to private. guest.enc_status_change_finish() called after adjusting direct mapping and it converts the memory to shared. It is okay to have a shared mapping of memory that is not converted properly. handle_mmio() knows how to deal with load_unaligned_zeropad() stepping on it. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory") Cc: stable@vger.kernel.org --- arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -XXX,XX +XXX,XX @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) return true; } +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). + */ + if (enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ + if (!enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + void __init tdx_early_init(void) { u64 cc_mask; @@ -XXX,XX +XXX,XX @@ void __init tdx_early_init(void) */ physical_mask &= cc_mask - 1; - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * Touching privately mapped GPA that is not properly converted to + * private with MapGPA and accepted leads to unrecoverable exit + * to VMM. + * + * load_unaligned_zeropad() can touch memory that is not owned by + * the caller, but just happened to next after the owned memory. + * This load_unaligned_zeropad() behaviour makes it important when + * kernel asks VMM to convert a GPA from shared to private or back. + * Kernel must never have a page mapped into direct mapping (and + * aliases) as private when the GPA is already converted to shared or + * when GPA is not yet converted to private. + * + * guest.enc_status_change_prepare() called before adjusting direct + * mapping and therefore it is responsible for converting the memory + * to private. + * + * guest.enc_status_change_finish() called after adjusting direct + * mapping and it converts the memory to shared. + * + * It is okay to have a shared mapping of memory that is not converted + * properly. handle_mmio() knows how to deal with load_unaligned_zeropad() + * stepping on it. + */ + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; + + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; pr_info("Guest detected\n"); } -- 2.39.3
enc_status_change_finish_noop() defined as always-fail now which doesn't make sense for noop. The change doesn't have user-visible effect because it only gets called if the platform has CC_ATTR_MEM_ENCRYPT. All platforms with the attribute override the callback with own implementation. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> --- arch/x86/kernel/x86_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -XXX,XX +XXX,XX @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } -- 2.39.3
During review of TDX guests on Hyper-V patchset Dave pointed to the potential race between changing page private/shared status and load_unaligned_zeropad(). Fix the issue. v3: - Fix grammar; - Add Sathya's Reviewed-bys; v2: - Add more info in commit message of the first patch. - Move enc_status_change_finish_noop() into a separate patch. - Fix typo in commit message and comment. Kirill A. Shutemov (3): x86/mm: Allow guest.enc_status_change_prepare() to fail x86/tdx: Fix race between set_memory_encrypted() and load_unaligned_zeropad() x86/mm: Fix enc_status_change_finish_noop() arch/x86/coco/tdx/tdx.c | 64 +++++++++++++++++++++++++++++++-- arch/x86/include/asm/x86_init.h | 2 +- arch/x86/kernel/x86_init.c | 4 +-- arch/x86/mm/mem_encrypt_amd.c | 4 ++- arch/x86/mm/pat/set_memory.c | 3 +- 5 files changed, 69 insertions(+), 8 deletions(-) -- 2.39.3
TDX code is going to provide guest.enc_status_change_prepare() that is able to fail. TDX will use the call to convert the GPA range from shared to private. This operation can fail. Add a way to return an error from the callback. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> --- arch/x86/include/asm/x86_init.h | 2 +- arch/x86/kernel/x86_init.c | 2 +- arch/x86/mm/mem_encrypt_amd.c | 4 +++- arch/x86/mm/pat/set_memory.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -XXX,XX +XXX,XX @@ struct x86_init_acpi { * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status */ struct x86_guest { - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -XXX,XX +XXX,XX @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -XXX,XX +XXX,XX @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) #endif } -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -XXX,XX +XXX,XX @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); + + return true; } /* Return true unconditionally: return value doesn't matter for the SEV side */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -XXX,XX +XXX,XX @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + return -EIO; ret = __change_page_attr_set_clr(&cpa, 1); -- 2.39.3
Touching privately mapped GPA that is not properly converted to private with MapGPA and accepted leads to an unrecoverable exit to VMM. load_unaligned_zeropad() can touch memory that is not owned by the caller, but just happened to next after the owned memory. This load_unaligned_zeropad() behaviour makes it important when kernel asks VMM to convert a GPA from shared to private or back. Kernel must never have a page mapped into direct mapping (and aliases) as private when the GPA is already converted to shared or when GPA is not yet converted to private. load_unaligned_zeropad() can touch memory that is not owned by the caller, but just happens to be next after the owned memory. This load_unaligned_zeropad() behavior makes it important when the kernel asks VMM to convert a GPA from shared to private or back. The kernel must never have a page mapped into direct mapping (and aliases) as private when the GPA is already converted to shared or when the GPA is not yet converted to private. guest.enc_status_change_prepare() is called before adjusting direct mapping and therefore is responsible for converting the memory to private. guest.enc_status_change_finish() is called after adjusting direct mapping and it converts the memory to shared. It is okay to have a shared mapping of memory that is not properly converted. handle_mmio() knows how to deal with load_unaligned_zeropad() stepping on it. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory") Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> --- arch/x86/coco/tdx/tdx.c | 64 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -XXX,XX +XXX,XX @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) return true; } +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). + */ + if (enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ + if (!enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} + void __init tdx_early_init(void) { u64 cc_mask; @@ -XXX,XX +XXX,XX @@ void __init tdx_early_init(void) */ physical_mask &= cc_mask - 1; - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * Touching privately mapped GPA that is not properly converted to + * private with MapGPA and accepted leads to an unrecoverable exit + * to VMM. + * + * load_unaligned_zeropad() can touch memory that is not owned by the + * caller, but just happened to next after the owned memory. + * This load_unaligned_zeropad() behaviour makes it important when + * kernel asks VMM to convert a GPA from shared to private or back. + * Kernel must never have a page mapped into direct mapping (and + * aliases) as private when the GPA is already converted to shared or + * when GPA is not yet converted to private. + * + * load_unaligned_zeropad() can touch memory that is not owned by the + * caller, but just happens to be next after the owned memory. This + * load_unaligned_zeropad() behavior makes it important when the kernel + * asks VMM to convert a GPA from shared to private or back. The kernel + * must never have a page mapped into direct mapping (and aliases) as + * private when the GPA is already converted to shared or when the GPA + * is not yet converted to private. + * + * guest.enc_status_change_prepare() is called before adjusting direct + * mapping and therefore is responsible for converting the memory to + * private. + * + * guest.enc_status_change_finish() is called after adjusting direct + * mapping and it converts the memory to shared. + * + * It is okay to have a shared mapping of memory that is not properly + * converted. handle_mmio() knows how to deal with + * load_unaligned_zeropad() stepping on it. + */ + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; + + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; pr_info("Guest detected\n"); } -- 2.39.3
enc_status_change_finish_noop() is now defined as always-fail, which doesn't make sense for noop. The change has no user-visible effect because it is only called if the platform has CC_ATTR_MEM_ENCRYPT. All platforms with the attribute override the callback with their own implementation. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> --- arch/x86/kernel/x86_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index XXXXXXX..XXXXXXX 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -XXX,XX +XXX,XX @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } -- 2.39.3