From nobody Sat Nov 30 01:31:35 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0E21A4F20E; Fri, 13 Sep 2024 17:06:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726247183; cv=none; b=NuzcOODIGUqFQYuOKKQU/ocuJA1dR0eWWNgYp6qluUcI7sIUqTYzz7G1kbKnx8dfKY4MW5dXh8Aoi00u+hYn0MkxWMEYeN8Aa6o5hwjWxqYwGJi7sSeIgLg0BlbFO+HKGQ0hotNUA+eo/yV7Jvm2sCbL0F40tytpGOKClHtvBTI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726247183; c=relaxed/simple; bh=c8xRrWQx8lzMHOGMOUi7KsdFCbyLJg7q+NaSRJ5jGDA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KtejzbsSvJVRywXwcdY0yUtjeGz0woEXa59Ly34p68Pe6Ow1dXFDfZwnG9T4gepkup/uWJjmPJi26GFTDLejSUOcYPigyogoDw8coeyy7a4DsDrb8XKBoCR7bbf6KzjcarRwxDcTgmukXEI3z+jw33u8JdOEX7su0kGoj8s2XNI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=QlWqMocU; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="QlWqMocU" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C19A2C4CED1; Fri, 13 Sep 2024 17:06:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726247182; bh=c8xRrWQx8lzMHOGMOUi7KsdFCbyLJg7q+NaSRJ5jGDA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=QlWqMocUbx7Yiz1jgIeZidWfJhFyVe3PApc9UwJ0/kb1yAsdeebhac2XiBXZvXEj3 tNbyUEV5wLexLa2XeAGr1jryQAi/mRkzIif2l4lJ2hglUWO4lTZLEyqmbfF1h4v02+ skjVItoIKzNDiILzOcsE/QAaVeuQEhGIybWGbaFC1mwGfcT4vcO+GxRnAzbgHug0mP 3TG87882kCXim4qT09zRIuIEwULVlGOlMtlYhtud8La20VMQFwWiGYvlqODj5QkNbk bILJEzWvmNobogTWfJs1adztePzcTksLgjJfsiWm/6U18cBFP1xQcYAk1qrrQGWG/f sTnFXsBs7YjOg== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v7 3/6] x86/tdx: Add validation of userspace MMIO instructions Date: Fri, 13 Sep 2024 19:05:58 +0200 Message-ID: <5d3787ea8947a41fb5a71c9e606e8e9a99e76feb.1726237595.git.legion@kernel.org> X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" Instructions from kernel space are considered trusted. If the MMIO instruction is from userspace it must be checked. For userspace instructions, it is need to check that the INSN has not changed at the time of #VE and before the execution of the instruction. Once the userspace instruction parsed is enforced that the address points to mapped memory of current process and that address does not point to private memory. After parsing the userspace instruction, it is necessary to ensure that: 1. the operation direction (read/write) corresponds to #VE info; 2. the address still points to mapped memory of current process; 3. the address does not point to private memory. Reviewed-by: Kirill A. Shutemov Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 13 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 008840ac1191..30651a5af180 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -406,6 +407,87 @@ static bool mmio_write(int size, unsigned long addr, u= nsigned long val) EPT_WRITE, addr, val); } =20 +static inline bool is_private_gpa(u64 gpa) +{ + return gpa =3D=3D cc_mkenc(gpa); +} + +static int get_phys_addr(unsigned long addr, phys_addr_t *phys_addr, bool = *writable) +{ + unsigned int level; + pgd_t *pgdp; + pte_t *ptep; + + /* + * Address validation only makes sense for a user process. The lock must + * be obtained before validation can begin. + */ + mmap_assert_locked(current->mm); + + pgdp =3D pgd_offset(current->mm, addr); + + if (!pgd_none(*pgdp)) { + ptep =3D lookup_address_in_pgd(pgdp, addr, &level); + if (ptep) { + unsigned long offset; + + if (!pte_decrypted(*ptep)) + return -EFAULT; + + offset =3D addr & ~page_level_mask(level); + *phys_addr =3D PFN_PHYS(pte_pfn(*ptep)); + *phys_addr |=3D offset; + + *writable =3D pte_write(*ptep); + + return 0; + } + } + + return -EFAULT; +} + +static int valid_vaddr(struct ve_info *ve, enum insn_mmio_type mmio, int s= ize, + unsigned long vaddr) +{ + phys_addr_t phys_addr; + bool writable =3D false; + + /* It's not fatal. This can happen due to swap out or page migration. */ + if (get_phys_addr(vaddr, &phys_addr, &writable) || (ve->gpa !=3D cc_mkdec= (phys_addr))) + return -EAGAIN; + + /* + * Re-check whether #VE info matches the instruction that was decoded. + * + * The ve->gpa was valid at the time ve_info was received. But this code + * executed with interrupts enabled, allowing tlb shootdown and therefore + * munmap() to be executed in the parallel thread. + * + * By the time MMIO emulation is performed, ve->gpa may be already + * unmapped from the process, the device it belongs to removed from + * system and something else could be plugged in its place. + */ + switch (mmio) { + case INSN_MMIO_WRITE: + case INSN_MMIO_WRITE_IMM: + if (!writable || !(ve->exit_qual & EPT_VIOLATION_ACC_WRITE)) + return -EFAULT; + break; + case INSN_MMIO_READ: + case INSN_MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: + if (!(ve->exit_qual & EPT_VIOLATION_ACC_READ)) + return -EFAULT; + break; + default: + WARN_ONCE(1, "Unsupported mmio instruction: %d", mmio); + return -EINVAL; + } + + return 0; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -490,7 +572,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) enum insn_mmio_type mmio; struct insn insn =3D {}; unsigned long vaddr; - int size; + int size, ret; =20 /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) @@ -511,6 +593,17 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) return -EINVAL; } =20 + vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); + + if (user_mode(regs)) { + if (mmap_read_lock_killable(current->mm)) + return -EINTR; + + ret =3D valid_vaddr(ve, mmio, size, vaddr); + if (ret) + goto unlock; + } + /* * Reject EPT violation #VEs that split pages. * @@ -520,30 +613,39 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) * * load_unaligned_zeropad() will recover using exception fixups. */ - vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); - if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) - return -EFAULT; + if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) { + ret =3D -EFAULT; + goto unlock; + } =20 switch (mmio) { case INSN_MMIO_WRITE: case INSN_MMIO_WRITE_IMM: case INSN_MMIO_MOVS: - return handle_mmio_write(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_write(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_READ: case INSN_MMIO_READ_ZERO_EXTEND: case INSN_MMIO_READ_SIGN_EXTEND: - return handle_mmio_read(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_read(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be * decoded or handled properly. It was likely not using io.h * helpers or accessed MMIO accidentally. */ - return -EINVAL; + ret =3D -EINVAL; + break; default: WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); - return -EINVAL; + ret =3D -EINVAL; } +unlock: + if (user_mode(regs)) + mmap_read_unlock(current->mm); + + return ret; } =20 static bool handle_in(struct pt_regs *regs, int size, int port) @@ -687,11 +789,6 @@ static int virt_exception_user(struct pt_regs *regs, s= truct ve_info *ve) } } =20 -static inline bool is_private_gpa(u64 gpa) -{ - return gpa =3D=3D cc_mkenc(gpa); -} - /* * Handle the kernel #VE. * @@ -729,6 +826,14 @@ bool tdx_handle_virt_exception(struct pt_regs *regs, s= truct ve_info *ve) insn_len =3D virt_exception_user(regs, ve); else insn_len =3D virt_exception_kernel(regs, ve); + + /* + * A special case to return to userspace without increasing regs->ip + * to repeat the instruction once again. + */ + if (insn_len =3D=3D -EAGAIN) + return true; + if (insn_len < 0) return false; =20 --=20 2.46.0