From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1F09F1662F8; Wed, 21 Aug 2024 14:25:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250351; cv=none; b=d6X0U0LMtOX13/itFFBYy5T05BrNbeR9P+kpsdj7NXsjRyALdrp6yTLqMBc1oRmpU+kBf1kMfdjRsXBuyGf9OqB0Tf3Z2iI8zw8tCiTDSdIhCZfj1EWh0NEsPFNnxhkcHEUEO+ujrFRt/mkPsqTaXDDsWV3lIuMbg2nyP1ceJBw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250351; c=relaxed/simple; bh=LcvOWJq3NiMNQ09LRmg1nA98cK9QWpXUp7B2W6XPS+8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=naGAcGZG5me4qveWkTD0NP2IQgIu23JHv6iEbEuqkGU38FacpnCxyS6OvdkTjXPMiejBwEWiZuC9Go290QM2WbJiJcYm6CExCwL81DvlE8eSR3B2C2CF4hccIA5zMUd1rdlBye8WZb4rukW9wcfUvQ/RgkZNQX5lsLZ1RAMPbFw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=GXue2ZCk; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="GXue2ZCk" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A72DCC32786; Wed, 21 Aug 2024 14:25:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250350; bh=LcvOWJq3NiMNQ09LRmg1nA98cK9QWpXUp7B2W6XPS+8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GXue2ZCkt6B4t7TeeL7ENCzdLmvEh/08CKBmbuYrwN7Q2RRFNzO43LSfzsMFO9uSC hXFNBeOK5iGt0x1dIKzZTqX6nrNtr2CS+B2Y5rLJhZssxl4wGR+mewG3dZpPbiws9b xz1vHxHCmF6if1U+tzSJXrPLPGmccHIEbWP+dZbWDiFxnDMn/9J64fn/bw1Ou1FkfA 1TnSyVFkdzcXaeevNDE+hCF7VFsjapSxAzGVRVW3qd8dgmqmo0Sp/ab1HsUIlED02n +8IoAI1+CvYn45Uw01kVPVZWTzJm6QtDFd7IjM/7J5goDM9hKpl0CCVePGPKMLAAUY yrI8HZQ38yShA== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 1/6] x86/tdx: Split MMIO read and write operations Date: Wed, 21 Aug 2024 16:24:33 +0200 Message-ID: X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" To implement MMIO in userspace, additional memory checks need to be implemented. To avoid overly complicating the handle_mmio() function and to separate checks from actions, it would be better to split this function into two separate functions to handle read and write operations. Reviewed-by: Kirill A. Shutemov Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 136 ++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 078e2bac2553..af0b6c1cacf7 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -405,14 +405,91 @@ static bool mmio_write(int size, unsigned long addr, = unsigned long val) EPT_WRITE, addr, val); } =20 +static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, + struct pt_regs *regs, struct ve_info *ve) +{ + unsigned long *reg, val; + + switch (mmio) { + case INSN_MMIO_WRITE: + reg =3D insn_get_modrm_reg_ptr(insn, regs); + if (!reg) + return -EINVAL; + memcpy(&val, reg, size); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn->length; + case INSN_MMIO_WRITE_IMM: + val =3D insn->immediate.value; + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn->length; + case INSN_MMIO_MOVS: + /* + * MMIO was accessed with an instruction that could not be + * decoded or handled properly. It was likely not using io.h + * helpers or accessed MMIO accidentally. + */ + return -EINVAL; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + return insn->length; +} + +static int handle_mmio_read(struct insn *insn, enum insn_mmio_type mmio, i= nt size, + struct pt_regs *regs, struct ve_info *ve) +{ + unsigned long *reg, val; + int extend_size; + u8 extend_val; + + reg =3D insn_get_modrm_reg_ptr(insn, regs); + if (!reg) + return -EINVAL; + + if (!mmio_read(size, ve->gpa, &val)) + return -EIO; + + extend_val =3D 0; + + switch (mmio) { + case INSN_MMIO_READ: + /* Zero-extend for 32-bit operation */ + extend_size =3D size =3D=3D 4 ? sizeof(*reg) : 0; + break; + case INSN_MMIO_READ_ZERO_EXTEND: + /* Zero extend based on operand size */ + extend_size =3D insn->opnd_bytes; + break; + case INSN_MMIO_READ_SIGN_EXTEND: + /* Sign extend based on operand size */ + extend_size =3D insn->opnd_bytes; + if (size =3D=3D 1 && val & BIT(7)) + extend_val =3D 0xFF; + else if (size > 1 && val & BIT(15)) + extend_val =3D 0xFF; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + if (extend_size) + memset(reg, extend_val, extend_size); + memcpy(reg, &val, size); + return insn->length; +} + static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { - unsigned long *reg, val, vaddr; char buffer[MAX_INSN_SIZE]; enum insn_mmio_type mmio; struct insn insn =3D {}; - int size, extend_size; - u8 extend_val =3D 0; + unsigned long vaddr; + int size; =20 /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) @@ -428,12 +505,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 - if (mmio !=3D INSN_MMIO_WRITE_IMM && mmio !=3D INSN_MMIO_MOVS) { - reg =3D insn_get_modrm_reg_ptr(&insn, regs); - if (!reg) - return -EINVAL; - } - /* * Reject EPT violation #VEs that split pages. * @@ -447,24 +518,15 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) return -EFAULT; =20 - /* Handle writes first */ switch (mmio) { case INSN_MMIO_WRITE: - memcpy(&val, reg, size); - if (!mmio_write(size, ve->gpa, val)) - return -EIO; - return insn.length; case INSN_MMIO_WRITE_IMM: - val =3D insn.immediate.value; - if (!mmio_write(size, ve->gpa, val)) - return -EIO; - return insn.length; + case INSN_MMIO_MOVS: + return handle_mmio_write(&insn, mmio, size, regs, ve); case INSN_MMIO_READ: case INSN_MMIO_READ_ZERO_EXTEND: case INSN_MMIO_READ_SIGN_EXTEND: - /* Reads are handled below */ - break; - case INSN_MMIO_MOVS: + return handle_mmio_read(&insn, mmio, size, regs, ve); case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be @@ -476,38 +538,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); return -EINVAL; } - - /* Handle reads */ - if (!mmio_read(size, ve->gpa, &val)) - return -EIO; - - switch (mmio) { - case INSN_MMIO_READ: - /* Zero-extend for 32-bit operation */ - extend_size =3D size =3D=3D 4 ? sizeof(*reg) : 0; - break; - case INSN_MMIO_READ_ZERO_EXTEND: - /* Zero extend based on operand size */ - extend_size =3D insn.opnd_bytes; - break; - case INSN_MMIO_READ_SIGN_EXTEND: - /* Sign extend based on operand size */ - extend_size =3D insn.opnd_bytes; - if (size =3D=3D 1 && val & BIT(7)) - extend_val =3D 0xFF; - else if (size > 1 && val & BIT(15)) - extend_val =3D 0xFF; - break; - default: - /* All other cases has to be covered with the first switch() */ - WARN_ON_ONCE(1); - return -EINVAL; - } - - if (extend_size) - memset(reg, extend_val, extend_size); - memcpy(reg, &val, size); - return insn.length; } =20 static bool handle_in(struct pt_regs *regs, int size, int port) --=20 2.45.2 From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0AD0B1B1D4B; Wed, 21 Aug 2024 14:25:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250355; cv=none; b=p8yQe3QuZdU55qQNXdWfzN7JHxJUoHVHumTssNejuSjxYf5Mit8Zuj6PZlFMW35VMzMmtwI4p4/9YoeEJbUDIgw4h6LCWJLfbHcczSCVm+neqZphSOKgbxbYBege+XgHJQPRXgVoRB7kiDpjO3vYQElklC2l56HddfYbJt8t4ss= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250355; c=relaxed/simple; bh=OdlbkrAkcDdFeYff2TkNkbf5A4blEnc3TOjwgl8wwa0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KL+75/gUDoMCq/6OOebQ/VjNC2YsGWftX0bsLszrIe0XbZinTGxitlt7ZyiiviVZB2Bceea0qjbToiLJW/aVsyhVn97Z/UYdC5fb7rIZdJ2LGOiCb1IOcsoHiGlBZUQKmxCVdHtkOXmBt4oqD2nr+RGJ4SOvJvpcm7WcokVCLtM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=TEvWmDNM; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="TEvWmDNM" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 09939C4AF09; Wed, 21 Aug 2024 14:25:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250354; bh=OdlbkrAkcDdFeYff2TkNkbf5A4blEnc3TOjwgl8wwa0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TEvWmDNMa7/TeHd+eDAr7GVUNZdnP48n3WDIvHYhAtnMnRjS833kLBogMPdTOWWlh 9zh9Wog+ikb7fjyJ5R9CvZLVbelM/2MbYmfgoxeifkwnySVAe2QQ+SYkDUapeZlQLr LhwRiq0M27CXfRr0hWA1hyAnZL0YjfUnn3dUvl1g46hqHOOhFcNoEsgOzyJkoJSe22 obbJl71bmwvLDxMu/LrYcFcnNb62dxHobK6WM2BbvokJfTn+94pjq5XZicXOvyRYSf xdFwWJkwTOs08eUCO7CCahuwE1O6E45ikTJSssB8RnUvoSiv69YmdwMC0iUjJicjV9 1zw2P/iS3Lw1A== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 2/6] x86/tdx: Add validation of userspace MMIO instructions Date: Wed, 21 Aug 2024 16:24:34 +0200 Message-ID: X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" Instructions from kernel space are considered trusted. If the MMIO instruction is from userspace it must be checked. For userspace instructions, it is need to check that the INSN has not changed at the time of #VE and before the execution of the instruction. Once the userspace instruction parsed is enforced that the address points to mapped memory of current process and that address does not point to private memory. After parsing the userspace instruction, it is necessary to ensure that: 1. the operation direction (read/write) corresponds to #VE info; 2. the address still points to mapped memory of current process; 3. the address does not point to private memory. Signed-off-by: Alexey Gladkov (Intel) Reviewed-by: Kirill A. Shutemov --- arch/x86/coco/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 13 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index af0b6c1cacf7..99634e12f9a7 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,87 @@ static bool mmio_write(int size, unsigned long addr, u= nsigned long val) EPT_WRITE, addr, val); } =20 +static inline bool is_private_gpa(u64 gpa) +{ + return gpa =3D=3D cc_mkenc(gpa); +} + +static int get_phys_addr(unsigned long addr, phys_addr_t *phys_addr, bool = *writable) +{ + unsigned int level; + pgd_t *pgdp; + pte_t *ptep; + + /* + * Address validation only makes sense for a user process. The lock must + * be obtained before validation can begin. + */ + mmap_assert_locked(current->mm); + + pgdp =3D pgd_offset(current->mm, addr); + + if (!pgd_none(*pgdp)) { + ptep =3D lookup_address_in_pgd(pgdp, addr, &level); + if (ptep) { + unsigned long offset; + + if (!pte_decrypted(*ptep)) + return -EFAULT; + + offset =3D addr & ~page_level_mask(level); + *phys_addr =3D PFN_PHYS(pte_pfn(*ptep)); + *phys_addr |=3D offset; + + *writable =3D pte_write(*ptep); + + return 0; + } + } + + return -EFAULT; +} + +static int valid_vaddr(struct ve_info *ve, enum insn_mmio_type mmio, int s= ize, + unsigned long vaddr) +{ + phys_addr_t phys_addr; + bool writable =3D false; + + /* It's not fatal. This can happen due to swap out or page migration. */ + if (get_phys_addr(vaddr, &phys_addr, &writable) || (ve->gpa !=3D cc_mkdec= (phys_addr))) + return -EAGAIN; + + /* + * Re-check whether #VE info matches the instruction that was decoded. + * + * The ve->gpa was valid at the time ve_info was received. But this code + * executed with interrupts enabled, allowing tlb shootdown and therefore + * munmap() to be executed in the parallel thread. + * + * By the time MMIO emulation is performed, ve->gpa may be already + * unmapped from the process, the device it belongs to removed from + * system and something else could be plugged in its place. + */ + switch (mmio) { + case INSN_MMIO_WRITE: + case INSN_MMIO_WRITE_IMM: + if (!writable || !(ve->exit_qual & EPT_VIOLATION_ACC_WRITE)) + return -EFAULT; + break; + case INSN_MMIO_READ: + case INSN_MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: + if (!(ve->exit_qual & EPT_VIOLATION_ACC_READ)) + return -EFAULT; + break; + default: + WARN_ONCE(1, "Unsupported mmio instruction: %d", mmio); + return -EINVAL; + } + + return 0; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -489,7 +571,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) enum insn_mmio_type mmio; struct insn insn =3D {}; unsigned long vaddr; - int size; + int size, ret; =20 /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) @@ -505,6 +587,17 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 + vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); + + if (user_mode(regs)) { + if (mmap_read_lock_killable(current->mm)) + return -EINTR; + + ret =3D valid_vaddr(ve, mmio, size, vaddr); + if (ret) + goto unlock; + } + /* * Reject EPT violation #VEs that split pages. * @@ -514,30 +607,39 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) * * load_unaligned_zeropad() will recover using exception fixups. */ - vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); - if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) - return -EFAULT; + if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) { + ret =3D -EFAULT; + goto unlock; + } =20 switch (mmio) { case INSN_MMIO_WRITE: case INSN_MMIO_WRITE_IMM: case INSN_MMIO_MOVS: - return handle_mmio_write(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_write(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_READ: case INSN_MMIO_READ_ZERO_EXTEND: case INSN_MMIO_READ_SIGN_EXTEND: - return handle_mmio_read(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_read(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be * decoded or handled properly. It was likely not using io.h * helpers or accessed MMIO accidentally. */ - return -EINVAL; + ret =3D -EINVAL; + break; default: WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); - return -EINVAL; + ret =3D -EINVAL; } +unlock: + if (user_mode(regs)) + mmap_read_unlock(current->mm); + + return ret; } =20 static bool handle_in(struct pt_regs *regs, int size, int port) @@ -681,11 +783,6 @@ static int virt_exception_user(struct pt_regs *regs, s= truct ve_info *ve) } } =20 -static inline bool is_private_gpa(u64 gpa) -{ - return gpa =3D=3D cc_mkenc(gpa); -} - /* * Handle the kernel #VE. * @@ -723,6 +820,14 @@ bool tdx_handle_virt_exception(struct pt_regs *regs, s= truct ve_info *ve) insn_len =3D virt_exception_user(regs, ve); else insn_len =3D virt_exception_kernel(regs, ve); + + /* + * A special case to return to userspace without increasing regs->ip + * to repeat the instruction once again. + */ + if (insn_len =3D=3D -EAGAIN) + return true; + if (insn_len < 0) return false; =20 --=20 2.45.2 From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B61E81B1D72; Wed, 21 Aug 2024 14:25:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250359; cv=none; b=KDZnyK3GvIZN+xl+pkIqaKp0G4X0lIGzt8LbRXDXvQyD6L/42wQ3DMRBgHzW9jLnhG5DijYt17V/1ZY6wTAd7rt/jmWrxzXuxePMKSulgJSqYH5LGMAiFfZeHRT6c24JIT7jloAri/GIl8hzB2eYKgyy8qMDCr7FchNcLkfCH2I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250359; c=relaxed/simple; bh=VaLKxFNPhXvmxXUd47RV57VILsjFoGbx9brLSVYXouI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ZG9QS3FPflINn6SMNm7swXDVCPXm/s/tRr/ry5MvFH7XIQMITYFFnTxndoBykgfwbAYLRa/pkwCmdXn6uEbeRBTdAzZWxhk9hL8FElVMqP7YHAeWQnmhO2ol3+58oaay7rjNLq0ZzXwLa8adkQpAI8o+NJE/jAa5iDmKHoZ581o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=CsZVnKF8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="CsZVnKF8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5C2CDC32786; Wed, 21 Aug 2024 14:25:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250359; bh=VaLKxFNPhXvmxXUd47RV57VILsjFoGbx9brLSVYXouI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=CsZVnKF8rqFsoiX6mWmS69pxHFoCtD4TROgyZMxvjoA5uKkuQbog/3NbQOQ6vry6y iIqHKnDj5iXmFw3wIy6VvP07/D/yBxIHvijUp+3A6EFdbIWS5LpIZfwYV6pNoW/cV+ UqH4IBwzwosw+OOAXqH5mWJ71tfWxPWJml070WwINL5SQKS8eAeVU6PV40HzDFsAiT i6BqHhiHR+cnijEMqt+2JQZnN1Fj7CTrRcPne3SvQ8jC35OxfyGkBIHSk+ZI/8Q3AC dZnwJbU8Q7r9tAu5W1GJEDPrQDZFx3KXkITOP4PuojNPvFKhENVS0G5fxBomhCK+lv bwzOaT6OnOvig== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 3/6] x86/tdx: Allow MMIO from userspace Date: Wed, 21 Aug 2024 16:24:35 +0200 Message-ID: <41b77c0ad25ceed006d59ba259b3c2f0a7ee74e5.1724248680.git.legion@kernel.org> X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" The MMIO emulation is only allowed for kernel space code. It is carried out through a special API, which uses only certain instructions. This does not allow userspace to work with virtual devices. Allow userspace to use the same instructions as kernel space to access MMIO. Additional checks have been added previously. Reviewed-by: Thomas Gleixner Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 43 +++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 99634e12f9a7..5d2d07aa08ce 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -487,6 +487,32 @@ static int valid_vaddr(struct ve_info *ve, enum insn_m= mio_type mmio, int size, return 0; } =20 +static int decode_insn_struct(struct insn *insn, struct pt_regs *regs) +{ + char buffer[MAX_INSN_SIZE]; + + if (user_mode(regs)) { + int nr_copied =3D insn_fetch_from_user(regs, buffer); + + if (nr_copied <=3D 0) + return -EFAULT; + + if (!insn_decode_from_regs(insn, regs, buffer, nr_copied)) + return -EINVAL; + } else { + if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) + return -EFAULT; + + if (insn_decode(insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) + return -EINVAL; + } + + if (!insn->immediate.got) + return -EINVAL; + + return 0; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -567,21 +593,14 @@ static int handle_mmio_read(struct insn *insn, enum i= nsn_mmio_type mmio, int siz =20 static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { - char buffer[MAX_INSN_SIZE]; enum insn_mmio_type mmio; struct insn insn =3D {}; unsigned long vaddr; int size, ret; =20 - /* Only in-kernel MMIO is supported */ - if (WARN_ON_ONCE(user_mode(regs))) - return -EFAULT; - - if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) - return -EFAULT; - - if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) - return -EINVAL; + ret =3D decode_insn_struct(&insn, regs); + if (ret) + return ret; =20 mmio =3D insn_decode_mmio(&insn, &size); if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) @@ -777,6 +796,10 @@ static int virt_exception_user(struct pt_regs *regs, s= truct ve_info *ve) switch (ve->exit_reason) { case EXIT_REASON_CPUID: return handle_cpuid(regs, ve); + case EXIT_REASON_EPT_VIOLATION: + if (is_private_gpa(ve->gpa)) + panic("Unexpected EPT-violation on private memory."); + return handle_mmio(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); return -EIO; --=20 2.45.2 From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1745A1B1D7F; Wed, 21 Aug 2024 14:26:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250364; cv=none; b=sa6l4x7Ep8jR54ouxw1++6egR9ueFdgB2H7xJU2rqVf8vApMbflcORFRwLgzBGtNDBNy6hnwaUHOQDzYzQ3Bi5G8yC4bhIqJ6Y8hMvPKNRuYMcApF7uTAB986ibaMPgq6A2dFzRhG2SDSZ5cUPXV2mpp5FPdfzURFaXiiLOh7LU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250364; c=relaxed/simple; bh=YU4h73oQw0zQKde8/rV+ORs8akZE2n3EpYgUMr0XtWQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=R9aPbA7l5pgbL2VxbADu5itKUihU8sXOQ/EMkYDTvRzTHHqMAsB7j5JJkqMCeJsESqCVw0sEEQ+nNzO7rsbcWb0kyjw4b44fRGRJLK7Zt64VncIYnqyq4s/bln/K/awwwsg5lsHYKX+DhNsXDJMGRUJt86TpvYa7pYdYO5shtxM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kEsAjVe3; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kEsAjVe3" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B0394C32781; Wed, 21 Aug 2024 14:25:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250363; bh=YU4h73oQw0zQKde8/rV+ORs8akZE2n3EpYgUMr0XtWQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=kEsAjVe3G1eVVSwsbUTOnJOGWQXntakFxgOSI2/AW/LGrX2JadlvTdK4tPT1bddai TEn5BXAYgXmgYZcNjJUaTDCys5eVfBJvQ9wO1RWGbZ99ZOpI1RbNNF4HawyHikhZ/K bQAWNislGaPYa/y1hImylpapKL13DuyjzB1kPCVCei2zbxiouB0L0z0hcngS4m/CYO IyN4UMqrVo1YU6S8pcoFLDe/KQ2VsbK3S1e2C1lDy2CFloTnHGkfiDj7Ixr6JtoLXz CBlEl+mFXs0XJvEhf0F54QqY4en10SieLefPZXCXgn2y+nqu426SWeMNuleYE68/eu UmqqRHAvSfgrg== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 4/6] x86/tdx: Add a restriction on access to MMIO address Date: Wed, 21 Aug 2024 16:24:36 +0200 Message-ID: <0d4381397c33426bda5d3b0e5541965dcebdf962.1724248680.git.legion@kernel.org> X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" In the case of userspace MMIO, if the user instruction + MAX_INSN_SIZE straddles page, then the "fetch" in the kernel could trigger a #VE. In this case the kernel would handle this second #VE as a !user_mode() MMIO. That way, additional address verifications can be avoided. The scenario of accessing userspace MMIO addresses from kernelspace does not seem appropriate under normal circumstances. Until there is a specific usecase for such a scenario it can be disabled. Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 5d2d07aa08ce..65f65015238a 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -411,6 +411,11 @@ static inline bool is_private_gpa(u64 gpa) return gpa =3D=3D cc_mkenc(gpa); } =20 +static inline bool is_kernel_addr(unsigned long addr) +{ + return (long)addr < 0; +} + static int get_phys_addr(unsigned long addr, phys_addr_t *phys_addr, bool = *writable) { unsigned int level; @@ -606,6 +611,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 + if (!user_mode(regs) && !is_kernel_addr(ve->gla)) { + WARN_ONCE(1, "Access to userspace address is not supported"); + return -EINVAL; + } + vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); =20 if (user_mode(regs)) { --=20 2.45.2 From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7AE1B1B1D7F; Wed, 21 Aug 2024 14:26:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250368; cv=none; b=hTC3SSPwduU6BwholIkRCF7M9UrXTPeuCSTaVTeaXLtwLdcVfcAQDnmJ3bgELb/+7TIt+Dp0vqtVydQe1o1MVEyJbidipuKlAQkFRIjrUjIKmBf9PpgZlnOmqg91/0mtRFrBwhDlhJa2sS3dAREoF9OL+Hr3+kbQo7HYnvrIiVU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250368; c=relaxed/simple; bh=q+E3FUZqhc/tRBNnL32xzsatvfNuIAGK65nZ+l4lVX4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ZlSxdyZ2GMtr5Sf09y+aEXuZyUWN4ecNwIqfvaHWnliI9KuGWJBpu7ISF0xLHajeoth8MsJu3qg14bCD+li/b74UDzZQ/djdh4tCktCJM1iFnp+LiOmtOC1hnLlzGCaYO3LROqTjmpV/l6dq1piSgu7xDud8uVHMGkgG4heVzvw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VAWZh8Ef; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VAWZh8Ef" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 17FCAC4AF09; Wed, 21 Aug 2024 14:26:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250368; bh=q+E3FUZqhc/tRBNnL32xzsatvfNuIAGK65nZ+l4lVX4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VAWZh8EfHyTGCmpnDggHVRVLJFM1VNqej0s4mLmXvgZHh6X3kYhw8RSxKqDerrlKe 1O/IaNK6cVA7gzxddubGgz+mDFlnw7u7YUMQoP401BLSV0ozJhEUmYmlitWsBJ+jfe IuyPSPhZSVFXZGa0Vnjtc6ANr+LIVQ51+kgVgtJBPYN2Mu5d1hKdAj4ksmajs4A920 K3yiXzsjRf11QMYpVNeZ01C0b1p4bfwj2/1gkWln8HNLIfvz+26t5jtt6rOsXHslO5 4YEdG5Qf1V8t9MOgy2prK7HgmWUmtwC/sE/S8/k8di+ythq9FuCbKFz20EBR+ps1/H EYeAhpACxuljw== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 5/6] x86/tdx: Move MMIO helpers to common library Date: Wed, 21 Aug 2024 16:24:37 +0200 Message-ID: <125b648888ea976e3b8d10df70e40f03a9ca35d6.1724248680.git.legion@kernel.org> X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" AMD code has helpers that are used to emulate MOVS instructions. To be able to reuse this code in the MOVS implementation for intel, it is necessary to move them to a common location. Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/sev/core.c | 135 ++++---------------------------------- arch/x86/include/asm/io.h | 3 + arch/x86/lib/iomem.c | 125 +++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 121 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc..0e10c22c5347 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -369,72 +369,18 @@ static enum es_result vc_decode_insn(struct es_em_ctx= t *ctxt) static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, char *dst, char *buf, size_t size) { - unsigned long error_code =3D X86_PF_PROT | X86_PF_WRITE; - - /* - * This function uses __put_user() independent of whether kernel or user - * memory is accessed. This works fine because __put_user() does no - * sanity checks of the pointer being accessed. All that it does is - * to report when the access failed. - * - * Also, this function runs in atomic context, so __put_user() is not - * allowed to sleep. The page-fault handler detects that it is running - * in atomic context and will not try to take mmap_sem and handle the - * fault, so additional pagefault_enable()/disable() calls are not - * needed. - * - * The access can't be done via copy_to_user() here because - * vc_write_mem() must not use string instructions to access unsafe - * memory. The reason is that MOVS is emulated by the #VC handler by - * splitting the move up into a read and a write and taking a nested #VC - * exception on whatever of them is the MMIO access. Using string - * instructions here would cause infinite nesting. - */ - switch (size) { - case 1: { - u8 d1; - u8 __user *target =3D (u8 __user *)dst; - - memcpy(&d1, buf, 1); - if (__put_user(d1, target)) - goto fault; - break; - } - case 2: { - u16 d2; - u16 __user *target =3D (u16 __user *)dst; - - memcpy(&d2, buf, 2); - if (__put_user(d2, target)) - goto fault; - break; - } - case 4: { - u32 d4; - u32 __user *target =3D (u32 __user *)dst; + unsigned long error_code; + int ret; =20 - memcpy(&d4, buf, 4); - if (__put_user(d4, target)) - goto fault; - break; - } - case 8: { - u64 d8; - u64 __user *target =3D (u64 __user *)dst; + ret =3D __put_iomem(dst, buf, size); + if (!ret) + return ES_OK; =20 - memcpy(&d8, buf, 8); - if (__put_user(d8, target)) - goto fault; - break; - } - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + if (ret =3D=3D -EIO) return ES_UNSUPPORTED; - } =20 - return ES_OK; + error_code =3D X86_PF_PROT | X86_PF_WRITE; =20 -fault: if (user_mode(ctxt->regs)) error_code |=3D X86_PF_USER; =20 @@ -448,71 +394,18 @@ static enum es_result vc_write_mem(struct es_em_ctxt = *ctxt, static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, char *src, char *buf, size_t size) { - unsigned long error_code =3D X86_PF_PROT; - - /* - * This function uses __get_user() independent of whether kernel or user - * memory is accessed. This works fine because __get_user() does no - * sanity checks of the pointer being accessed. All that it does is - * to report when the access failed. - * - * Also, this function runs in atomic context, so __get_user() is not - * allowed to sleep. The page-fault handler detects that it is running - * in atomic context and will not try to take mmap_sem and handle the - * fault, so additional pagefault_enable()/disable() calls are not - * needed. - * - * The access can't be done via copy_from_user() here because - * vc_read_mem() must not use string instructions to access unsafe - * memory. The reason is that MOVS is emulated by the #VC handler by - * splitting the move up into a read and a write and taking a nested #VC - * exception on whatever of them is the MMIO access. Using string - * instructions here would cause infinite nesting. - */ - switch (size) { - case 1: { - u8 d1; - u8 __user *s =3D (u8 __user *)src; - - if (__get_user(d1, s)) - goto fault; - memcpy(buf, &d1, 1); - break; - } - case 2: { - u16 d2; - u16 __user *s =3D (u16 __user *)src; + unsigned long error_code; + int ret; =20 - if (__get_user(d2, s)) - goto fault; - memcpy(buf, &d2, 2); - break; - } - case 4: { - u32 d4; - u32 __user *s =3D (u32 __user *)src; + ret =3D __get_iomem(src, buf, size); + if (!ret) + return ES_OK; =20 - if (__get_user(d4, s)) - goto fault; - memcpy(buf, &d4, 4); - break; - } - case 8: { - u64 d8; - u64 __user *s =3D (u64 __user *)src; - if (__get_user(d8, s)) - goto fault; - memcpy(buf, &d8, 8); - break; - } - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + if (ret =3D=3D -EIO) return ES_UNSUPPORTED; - } =20 - return ES_OK; + error_code =3D X86_PF_PROT; =20 -fault: if (user_mode(ctxt->regs)) error_code |=3D X86_PF_USER; =20 diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 1d60427379c9..ac01d53466cb 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -402,4 +402,7 @@ static inline void iosubmit_cmds512(void __iomem *dst, = const void *src, } } =20 +int __get_iomem(char *src, char *buf, size_t size); +int __put_iomem(char *src, char *buf, size_t size); + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index 5eecb45d05d5..23179953eb5a 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -2,6 +2,7 @@ #include #include #include +#include =20 #define movs(type,to,from) \ asm volatile("movs" type:"=3D&D" (to), "=3D&S" (from):"0" (to), "1" (from= ):"memory") @@ -124,3 +125,127 @@ void memset_io(volatile void __iomem *a, int b, size_= t c) } } EXPORT_SYMBOL(memset_io); + +int __get_iomem(char *src, char *buf, size_t size) +{ + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * mmio_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ + switch (size) { + case 1: { + u8 d1, __user *s =3D (u8 __user *)src; + + if (__get_user(d1, s)) + return -EFAULT; + memcpy(buf, &d1, 1); + break; + } + case 2: { + u16 d2, __user *s =3D (u16 __user *)src; + + if (__get_user(d2, s)) + return -EFAULT; + memcpy(buf, &d2, 2); + break; + } + case 4: { + u32 d4, __user *s =3D (u32 __user *)src; + + if (__get_user(d4, s)) + return -EFAULT; + memcpy(buf, &d4, 4); + break; + } + case 8: { + u64 d8, __user *s =3D (u64 __user *)src; + + if (__get_user(d8, s)) + return -EFAULT; + memcpy(buf, &d8, 8); + break; + } + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return -EIO; + } + + return 0; +} + +int __put_iomem(char *dst, char *buf, size_t size) +{ + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * put_iomem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ + switch (size) { + case 1: { + u8 d1, __user *target =3D (u8 __user *)dst; + + memcpy(&d1, buf, 1); + if (__put_user(d1, target)) + return -EFAULT; + break; + } + case 2: { + u16 d2, __user *target =3D (u16 __user *)dst; + + memcpy(&d2, buf, 2); + if (__put_user(d2, target)) + return -EFAULT; + break; + } + case 4: { + u32 d4, __user *target =3D (u32 __user *)dst; + + memcpy(&d4, buf, 4); + if (__put_user(d4, target)) + return -EFAULT; + break; + } + case 8: { + u64 d8, __user *target =3D (u64 __user *)dst; + + memcpy(&d8, buf, 8); + if (__put_user(d8, target)) + return -EFAULT; + break; + } + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return -EIO; + } + + return 0; +} --=20 2.45.2 From nobody Wed Feb 11 05:03:29 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7BEC31B3B2A; Wed, 21 Aug 2024 14:26:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250372; cv=none; b=LaCiMXNjVk9S1BxGboM8qe29Ss+9bAC1ky2CFLq41HKedV2xmnnNRhMjHQPiwETL4v342cxTJRY0q5knPB89nI6rd961QZCl2MdmheUByEgTC7y2jHdU7X3nZUfvyILijLMrC3keFQf02G6PLn9sWkEeg/TNxNCFdORT1KhToBw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1724250372; c=relaxed/simple; bh=sqVXi89tYI0s7IoD92jonGg926UqsYeyDX6OHzOQkVo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=gIC1diRywrRJSG0oIhzdot+N0QHEoZQT2D30zFSnqMScDq5OK9f6WNXFkGm1emxNjgi5zGpnfY8S+PqikeG3BlezhtNJn4YRvo+Qj/QdTtad9oS9x81h6O6GgJp7fuOgDrNYiFdN1GTGpqFOwTobHmPBCoHjkX1RNxbPHKX34Ys= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=rX+iGplY; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rX+iGplY" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7113EC32781; Wed, 21 Aug 2024 14:26:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1724250372; bh=sqVXi89tYI0s7IoD92jonGg926UqsYeyDX6OHzOQkVo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rX+iGplYwaRwPZ3u5hFLSpySpuM7etMXZeMR0dg9CqRY4P3PkUlYmmomke28wJF5X TTDlR+7andARIC6sKyvg4yCFiXPL/IYsLTYRyKAogE/ewRFnFP8HxVnfWFapkyhbeG +gvc6VwEqlM3WjBncVN+Q/qOKlRpbdBjVoXWIjLm8eZ6COuTYS4djJ0uCXUBOxVuss 5Q6eJkbuzvtNflfCPdAvNNPQnB3mCCurL5sccdoTeyZCMxJNb3O7J6v+lTN0ZfV7hh tgfyQUUWUaik0eJheuU3Wa+il8fKmT5JrLAzf85lDUswYWlhDt5LZeh2dg8XzfPxx/ CXj0iaUX0nFnQ== From: Alexey Gladkov To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: "Alexey Gladkov (Intel)" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v4 6/6] x86/tdx: Implement movs for MMIO Date: Wed, 21 Aug 2024 16:24:38 +0200 Message-ID: <9320e721e609e55a020d3eb98f48fc856371c561.1724248680.git.legion@kernel.org> X-Mailer: git-send-email 2.46.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Alexey Gladkov (Intel)" Add emulation of the MOVS instruction on MMIO regions. MOVS emulation consists of dividing it into a series of read and write operations, which in turn will be validated separately. Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 84 +++++++++++++++++++++++++++++--- arch/x86/include/asm/processor.h | 4 ++ 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 65f65015238a..d4bec84de034 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -518,6 +518,62 @@ static int decode_insn_struct(struct insn *insn, struc= t pt_regs *regs) return 0; } =20 +static int handle_mmio_movs(struct insn *insn, struct pt_regs *regs, int s= ize, struct ve_info *ve) +{ + unsigned long ds_base, es_base; + unsigned char *src, *dst; + unsigned char buffer[8]; + int off, ret; + bool rep; + + /* + * The in-kernel code must use a special API that does not use MOVS. + * If the MOVS instruction is received from in-kernel, then something + * is broken. + */ + if (WARN_ON_ONCE(!user_mode(regs))) + return -EFAULT; + + ds_base =3D insn_get_seg_base(regs, INAT_SEG_REG_DS); + es_base =3D insn_get_seg_base(regs, INAT_SEG_REG_ES); + + if (ds_base =3D=3D -1L || es_base =3D=3D -1L) + return -EINVAL; + + rep =3D insn_has_rep_prefix(insn); + + do { + src =3D ds_base + (unsigned char *) regs->si; + dst =3D es_base + (unsigned char *) regs->di; + + current->thread.mmio_emul =3D (unsigned long) src; + + ret =3D __get_iomem(src, buffer, size); + if (ret) + goto out; + + current->thread.mmio_emul =3D (unsigned long) dst; + + ret =3D __put_iomem(dst, buffer, size); + if (ret) + goto out; + + off =3D (regs->flags & X86_EFLAGS_DF) ? -size : size; + + regs->si +=3D off; + regs->di +=3D off; + + if (rep) + regs->cx -=3D 1; + } while (rep || regs->cx > 0); + + ret =3D insn->length; +out: + current->thread.mmio_emul =3D 0; + + return ret; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -539,9 +595,8 @@ static int handle_mmio_write(struct insn *insn, enum in= sn_mmio_type mmio, int si return insn->length; case INSN_MMIO_MOVS: /* - * MMIO was accessed with an instruction that could not be - * decoded or handled properly. It was likely not using io.h - * helpers or accessed MMIO accidentally. + * MOVS is processed through higher level emulation which breaks + * this instruction into a sequence of reads and writes. */ return -EINVAL; default: @@ -600,6 +655,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) { enum insn_mmio_type mmio; struct insn insn =3D {}; + int need_validation; unsigned long vaddr; int size, ret; =20 @@ -611,14 +667,27 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 + if (mmio =3D=3D INSN_MMIO_MOVS) + return handle_mmio_movs(&insn, regs, size, ve); + + need_validation =3D user_mode(regs); + if (!user_mode(regs) && !is_kernel_addr(ve->gla)) { - WARN_ONCE(1, "Access to userspace address is not supported"); - return -EINVAL; + /* + * Access from kernel to userspace addresses is not allowed + * unless it is a nested exception during MOVS emulation. + */ + if (current->thread.mmio_emul !=3D ve->gla || !current->mm) { + WARN_ONCE(1, "Access to userspace address is not supported"); + return -EINVAL; + } + + need_validation =3D 1; } =20 vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); =20 - if (user_mode(regs)) { + if (need_validation) { if (mmap_read_lock_killable(current->mm)) return -EINTR; =20 @@ -644,7 +713,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) switch (mmio) { case INSN_MMIO_WRITE: case INSN_MMIO_WRITE_IMM: - case INSN_MMIO_MOVS: ret =3D handle_mmio_write(&insn, mmio, size, regs, ve); break; case INSN_MMIO_READ: @@ -665,7 +733,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) ret =3D -EINVAL; } unlock: - if (user_mode(regs)) + if (need_validation) mmap_read_unlock(current->mm); =20 return ret; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/proces= sor.h index a75a07f4931f..45136b1b02cc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -503,6 +503,10 @@ struct thread_struct { struct thread_shstk shstk; #endif =20 +#ifdef CONFIG_INTEL_TDX_GUEST + unsigned long mmio_emul; +#endif + /* Floating point and extended processor state */ struct fpu fpu; /* --=20 2.45.2