From nobody Fri Dec 19 22:01:07 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 452A91A76C6; Tue, 30 Jul 2024 17:36:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360980; cv=none; b=s8HhATrQjDll7CiJ0Gu/0c+RfA8FnLrtmOc4enAsXJpCwcjRdvvdQ33yXH2ygt0stxUU6Ixbc4mM+L5Tde/MlN+RBbvrSHH12gxeoLWlgi/L+hrBoRhV3gXq6bg/cdmqFFr46t2uxnJGGbqkp6vgub+It/jpmCT96+nqyESI5k4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360980; c=relaxed/simple; bh=nsTmQqkOfAU54MF4OMcnqCdNe15ILGkA4eZtUW7kCI4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CcSUXC7VZjaGZxnRRHv9YK59gfN9Bfk2eOQva5y4oiT6IqNo7wKNRET1rL4iRX6+sYtboHLqqqEPMt6PLDIeZDDzCC3TEtw1Aex8Pjki5LXKTy5P+aR9M35Q0BR9RTQO1+sYAMx+eiFmVeqCTG/GOrmGPDImKYOAnztSbbUSndA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=TH/hiiO4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="TH/hiiO4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 713C9C4AF0C; Tue, 30 Jul 2024 17:36:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722360980; bh=nsTmQqkOfAU54MF4OMcnqCdNe15ILGkA4eZtUW7kCI4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TH/hiiO4tYA2amyWAf5O0Q+DvMniy6D6MWkoKreDEkjdh1sSguD1Sst88WUDL28An e6miNWpDDLGFE336pa8MbMfH5u0iQo/VikXno2q0r25GZfA0vEDMi7S1vFqlOcK1x6 6rDGxuzapWuClznqUQe3bI06Qi7mYFnJlutn8tXmv9AEeOv1zO4yzxjM3aJoU63G7z jyoPMM9gY/n1K+xEkxJ5KWmGm/fI6v1zU+YGdS4VTVXkRF20tTFAbBQ+t6DdyOQXrl /mHTIm4VytlYr3b1Fldf3mqi4aRSONl5nzvTm1pqAijR+9eZf1xUpDfpxg3dEbgyk7 lUjJqT2G6h7Fg== From: "Alexey Gladkov (Intel)" To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v1 1/4] x86/tdx: Split MMIO read and write operations Date: Tue, 30 Jul 2024 19:35:56 +0200 Message-ID: X-Mailer: git-send-email 2.45.2 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To implement MMIO in userspace, additional memory checks need to be implemented. To avoid overly complicating the handle_mmio() function and to separate checks from actions, it will be split into two separate functions for handling read and write operations. Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 135 ++++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 53 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 078e2bac2553..41b047a08071 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -405,14 +405,90 @@ static bool mmio_write(int size, unsigned long addr, = unsigned long val) EPT_WRITE, addr, val); } =20 +static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, + struct pt_regs *regs, struct ve_info *ve) +{ + unsigned long *reg, val; + + switch (mmio) { + case INSN_MMIO_WRITE: + reg =3D insn_get_modrm_reg_ptr(insn, regs); + if (!reg) + return -EINVAL; + memcpy(&val, reg, size); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn->length; + case INSN_MMIO_WRITE_IMM: + val =3D insn->immediate.value; + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn->length; + case INSN_MMIO_MOVS: + /* + * MMIO was accessed with an instruction that could not be + * decoded or handled properly. It was likely not using io.h + * helpers or accessed MMIO accidentally. + */ + return -EINVAL; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + return insn->length; +} + +static int handle_mmio_read(struct insn *insn, enum insn_mmio_type mmio, i= nt size, + struct pt_regs *regs, struct ve_info *ve) +{ + unsigned long *reg, val; + int extend_size; + u8 extend_val =3D 0; + + reg =3D insn_get_modrm_reg_ptr(insn, regs); + if (!reg) + return -EINVAL; + + /* Handle reads */ + if (!mmio_read(size, ve->gpa, &val)) + return -EIO; + + switch (mmio) { + case INSN_MMIO_READ: + /* Zero-extend for 32-bit operation */ + extend_size =3D size =3D=3D 4 ? sizeof(*reg) : 0; + break; + case INSN_MMIO_READ_ZERO_EXTEND: + /* Zero extend based on operand size */ + extend_size =3D insn->opnd_bytes; + break; + case INSN_MMIO_READ_SIGN_EXTEND: + /* Sign extend based on operand size */ + extend_size =3D insn->opnd_bytes; + if (size =3D=3D 1 && val & BIT(7)) + extend_val =3D 0xFF; + else if (size > 1 && val & BIT(15)) + extend_val =3D 0xFF; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + if (extend_size) + memset(reg, extend_val, extend_size); + memcpy(reg, &val, size); + return insn->length; +} + static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { - unsigned long *reg, val, vaddr; + unsigned long vaddr; char buffer[MAX_INSN_SIZE]; enum insn_mmio_type mmio; struct insn insn =3D {}; - int size, extend_size; - u8 extend_val =3D 0; + int size; =20 /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) @@ -428,12 +504,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 - if (mmio !=3D INSN_MMIO_WRITE_IMM && mmio !=3D INSN_MMIO_MOVS) { - reg =3D insn_get_modrm_reg_ptr(&insn, regs); - if (!reg) - return -EINVAL; - } - /* * Reject EPT violation #VEs that split pages. * @@ -447,24 +517,15 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) return -EFAULT; =20 - /* Handle writes first */ switch (mmio) { case INSN_MMIO_WRITE: - memcpy(&val, reg, size); - if (!mmio_write(size, ve->gpa, val)) - return -EIO; - return insn.length; case INSN_MMIO_WRITE_IMM: - val =3D insn.immediate.value; - if (!mmio_write(size, ve->gpa, val)) - return -EIO; - return insn.length; + case INSN_MMIO_MOVS: + return handle_mmio_write(&insn, mmio, size, regs, ve); case INSN_MMIO_READ: case INSN_MMIO_READ_ZERO_EXTEND: case INSN_MMIO_READ_SIGN_EXTEND: - /* Reads are handled below */ - break; - case INSN_MMIO_MOVS: + return handle_mmio_read(&insn, mmio, size, regs, ve); case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be @@ -476,38 +537,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); return -EINVAL; } - - /* Handle reads */ - if (!mmio_read(size, ve->gpa, &val)) - return -EIO; - - switch (mmio) { - case INSN_MMIO_READ: - /* Zero-extend for 32-bit operation */ - extend_size =3D size =3D=3D 4 ? sizeof(*reg) : 0; - break; - case INSN_MMIO_READ_ZERO_EXTEND: - /* Zero extend based on operand size */ - extend_size =3D insn.opnd_bytes; - break; - case INSN_MMIO_READ_SIGN_EXTEND: - /* Sign extend based on operand size */ - extend_size =3D insn.opnd_bytes; - if (size =3D=3D 1 && val & BIT(7)) - extend_val =3D 0xFF; - else if (size > 1 && val & BIT(15)) - extend_val =3D 0xFF; - break; - default: - /* All other cases has to be covered with the first switch() */ - WARN_ON_ONCE(1); - return -EINVAL; - } - - if (extend_size) - memset(reg, extend_val, extend_size); - memcpy(reg, &val, size); - return insn.length; } =20 static bool handle_in(struct pt_regs *regs, int size, int port) --=20 2.45.2 From nobody Fri Dec 19 22:01:07 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5680B1A76C6; Tue, 30 Jul 2024 17:36:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360984; cv=none; b=sMSc7H5FdzLan85Ih9FjEKvsbZE1Pzw0RVen2lZxCrMt+XRk5USfeTbO048LaxdK2jlCt3Oz/fdP12EwN46JSuzCuXPftaiKh9rl+XfRXhxwcyyHEjNX9m3K01pZMX2iYSbBmXcmrDxUuFrRnKXZoIN0xZdr2nTegfIJ9R8moqQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360984; c=relaxed/simple; bh=Hs3tJ4z//lyAVWH2TvPhdN2PkFQg6LahqyvyViQBvLw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mmU8T5AGVykaBQShrxFX0E9GM1Bji64UvRV6bqcsPMkXIv+mbZPOim8HCLKaddLeZvk/h4c8iR7wFB3lb0e6xWO37D1dN9pOtdUB0QBeHKlywIEie2dRAKsS1AFIDaMRKmwznJ+aRnRjY4cPzMf4+iP4IIRHsDedlf6hE+Rstmc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=lVEj21M4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lVEj21M4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 81F49C32782; Tue, 30 Jul 2024 17:36:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722360984; bh=Hs3tJ4z//lyAVWH2TvPhdN2PkFQg6LahqyvyViQBvLw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lVEj21M4BzNeo1wzr/9YAlxMj/rHNlRk7R/4Owobv4gwlM7nK2jmlSh/JXvvBRJe2 5SxKv4oARszfNg4dfVWhP1zbbm9q28eHWE7ovhPS2rAR9kndclgt/9DTm1R8rIBE0Q kA0hW57WNiE+3Zrib0pZvtDLZeNTjoJIyDxomuB4vjUZm+u+54Vtrl9SPeCWIod8wC FuiiXCAPoxGpy7aNNUomoNGMeQmCjO25iRGg7o7TpDPdBPSnvrWXYj+1KlBIXn4g13 WVU2CL0EvOvGw3zdsoCKp2F+B3VnXf2uB5UIk9G/064yieJLZ5IK4gKZQkoKxNdyGz 9NkcnecsPu9hQ== From: "Alexey Gladkov (Intel)" To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v1 2/4] x86/tdx: Add validation of userspace MMIO instructions Date: Tue, 30 Jul 2024 19:35:57 +0200 Message-ID: <855a5d97f0e76373e76c5124f415d3abd7ae3699.1722356794.git.legion@kernel.org> X-Mailer: git-send-email 2.45.2 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Instructions from kernel space are considered trusted. If the MMIO instruction is from userspace it must be checked. For userspace instructions, it is need to check that the INSN has not changed at the time of #VE and before the execution of the instruction. Once the userspace instruction parsed is enforced that the address points to mapped memory of current process and that address does not point to private memory. After parsing the userspace instruction, it is necessary to ensure that: 1. the operation direction (read/write) corresponds to #VE info; 2. the address still points to mapped memory of current process; 3. the address does not point to private memory. Signed-off-by: Alexey Gladkov (Intel) --- arch/x86/coco/tdx/tdx.c | 118 +++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 13 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 41b047a08071..8c894ee9c245 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,74 @@ static bool mmio_write(int size, unsigned long addr, u= nsigned long val) EPT_WRITE, addr, val); } =20 +static inline bool is_private_gpa(u64 gpa) +{ + return gpa =3D=3D cc_mkenc(gpa); +} + +static int get_phys_addr(unsigned long addr, phys_addr_t *phys_addr, bool = *writable) +{ + unsigned int level; + pgd_t *pgdp; + pte_t *ptep; + + /* + * Address validation only makes sense for a user process. The lock must + * be obtained before validation can begin. + */ + mmap_assert_locked(current->mm); + + pgdp =3D pgd_offset(current->mm, addr); + + if (!pgd_none(*pgdp)) { + ptep =3D lookup_address_in_pgd(pgdp, addr, &level); + if (ptep) { + unsigned long offset; + + offset =3D addr & ~page_level_mask(level); + *phys_addr =3D PFN_PHYS(pte_pfn(*ptep)); + *phys_addr |=3D offset; + + *writable =3D pte_write(*ptep); + + return 0; + } + } + + return -EFAULT; +} + +static int valid_vaddr(struct ve_info *ve, enum insn_mmio_type mmio, int s= ize, + unsigned long vaddr) +{ + phys_addr_t phys_addr; + bool writable =3D false; + + /* It's not fatal. This can happen due to swap out or page migration. */ + if (get_phys_addr(vaddr, &phys_addr, &writable) || (ve->gpa !=3D cc_mkdec= (phys_addr))) + return -EAGAIN; + + /* Check whether #VE info matches the instruction that was decoded. */ + switch (mmio) { + case INSN_MMIO_WRITE: + case INSN_MMIO_WRITE_IMM: + if (!writable || !(ve->exit_qual & EPT_VIOLATION_ACC_WRITE)) + return -EFAULT; + break; + case INSN_MMIO_READ: + case INSN_MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: + if (!(ve->exit_qual & EPT_VIOLATION_ACC_READ)) + return -EFAULT; + break; + default: + WARN_ONCE(1, "Unsupported mmio instruction: %d", mmio); + return -EINVAL; + } + + return 0; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -488,7 +557,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) char buffer[MAX_INSN_SIZE]; enum insn_mmio_type mmio; struct insn insn =3D {}; - int size; + int size, ret; =20 /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) @@ -504,6 +573,17 @@ static int handle_mmio(struct pt_regs *regs, struct ve= _info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 + vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); + + if (user_mode(regs)) { + if (mmap_read_lock_killable(current->mm)) + return -EINTR; + + ret =3D valid_vaddr(ve, mmio, size, vaddr); + if (ret) + goto fault; + } + /* * Reject EPT violation #VEs that split pages. * @@ -513,30 +593,39 @@ static int handle_mmio(struct pt_regs *regs, struct v= e_info *ve) * * load_unaligned_zeropad() will recover using exception fixups. */ - vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); - if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) - return -EFAULT; + if (vaddr / PAGE_SIZE !=3D (vaddr + size - 1) / PAGE_SIZE) { + ret =3D -EFAULT; + goto fault; + } =20 switch (mmio) { case INSN_MMIO_WRITE: case INSN_MMIO_WRITE_IMM: case INSN_MMIO_MOVS: - return handle_mmio_write(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_write(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_READ: case INSN_MMIO_READ_ZERO_EXTEND: case INSN_MMIO_READ_SIGN_EXTEND: - return handle_mmio_read(&insn, mmio, size, regs, ve); + ret =3D handle_mmio_read(&insn, mmio, size, regs, ve); + break; case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be * decoded or handled properly. It was likely not using io.h * helpers or accessed MMIO accidentally. */ - return -EINVAL; + ret =3D -EINVAL; + break; default: WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); - return -EINVAL; + ret =3D -EINVAL; } +fault: + if (user_mode(regs)) + mmap_read_unlock(current->mm); + + return ret; } =20 static bool handle_in(struct pt_regs *regs, int size, int port) @@ -680,11 +769,6 @@ static int virt_exception_user(struct pt_regs *regs, s= truct ve_info *ve) } } =20 -static inline bool is_private_gpa(u64 gpa) -{ - return gpa =3D=3D cc_mkenc(gpa); -} - /* * Handle the kernel #VE. * @@ -722,6 +806,14 @@ bool tdx_handle_virt_exception(struct pt_regs *regs, s= truct ve_info *ve) insn_len =3D virt_exception_user(regs, ve); else insn_len =3D virt_exception_kernel(regs, ve); + + /* + * A special case to return to userspace without increasing regs->ip + * to repeat the instruction once again. + */ + if (insn_len =3D=3D -EAGAIN) + return true; + if (insn_len < 0) return false; =20 --=20 2.45.2 From nobody Fri Dec 19 22:01:07 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CDB971A76C6; Tue, 30 Jul 2024 17:36:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360988; cv=none; b=CNCJ1eveKTTqPnQ7Y8IHmapxtclVtwwuLHCDObAONehhkA/k1bHCqbX1I/lRy15yz/fm9Nw8mKtLVnzpKdfK7zTpdpveEf3lJF7UTMVU2E7+Hy+UFvAO9gb5OjUMhkp+A/H092kk8QUSaiHxMfLq7NesdZzXFrmU0vSWEpnmADk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360988; c=relaxed/simple; bh=xpjzJRz2Yty9K9l0Pcl5A9Xyutcf4oMMCTmjT65gKoo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cl5UnVGsTujyizaepcpBJ2i3y+TaQsoDL8VQImoGQBKn2vIPOhykhzprmdUYETNPgJCvBLOQtf5eM61LXK8jxbTFDpIlu6KEb0E8r6TA0rTUrhZ7USp+HZ88nlrCJtIWMPLxtdd15J5UQKxT/wRAEL1KYxNn4DFLw5mhDX+txtM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k5kQo9IO; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k5kQo9IO" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9259AC32782; Tue, 30 Jul 2024 17:36:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722360988; bh=xpjzJRz2Yty9K9l0Pcl5A9Xyutcf4oMMCTmjT65gKoo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k5kQo9IOTnf3lKFfFkx74I6jo0FNPImZaWi5l2BKhFsHi22rcZmvb65b/hp5fnriP 8Gd/LBOZQL+7r+HGkaI2wf0yNJJF7XPi5bklM3XWIZyUaUyUo8w6lDociBKsLjoC4O JP3U4uWqNF5hkMCoacj3aT3VCxxuzsEJCYxPP/W/l49ElcVZI8lS9H7qJIbXQyAosz 8I04UTfp65FjT6LSVhQ0BmeAh4/Brg8MBW3swytkXJMRHnIIhaZxwtPYPg6p3c1OVA P6+8NuCajlTvW52bFTyw80Fi++BTZpy5tebNsi2+qQp7+CLJVQuz04Y/Eq/IAFYJBi TzP54GsmTN7OA== From: "Alexey Gladkov (Intel)" To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v1 3/4] x86/tdx: Allow MMIO from userspace Date: Tue, 30 Jul 2024 19:35:58 +0200 Message-ID: <0331020dce360b77e40c53dfdb0624574dbb249f.1722356794.git.legion@kernel.org> X-Mailer: git-send-email 2.45.2 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The MMIO emulation is only allowed for kernel space code. It is carried out through a special API, which uses only certain instructions. This does not allow userspace to work with virtual devices. Allow userspace to use the same instructions as kernel space to access MMIO. So far, no additional checks have been made. Signed-off-by: Alexey Gladkov (Intel) Reviewed-by: Thomas Gleixner --- arch/x86/coco/tdx/tdx.c | 42 +++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 8c894ee9c245..26b2e52457be 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -474,6 +474,31 @@ static int valid_vaddr(struct ve_info *ve, enum insn_m= mio_type mmio, int size, return 0; } =20 +static int decode_insn_struct(struct insn *insn, struct pt_regs *regs) +{ + char buffer[MAX_INSN_SIZE]; + + if (user_mode(regs)) { + int nr_copied =3D insn_fetch_from_user(regs, buffer); + + if (nr_copied <=3D 0) + return -EFAULT; + + if (!insn_decode_from_regs(insn, regs, buffer, nr_copied)) + return -EINVAL; + + if (!insn->immediate.got) + return -EINVAL; + } else { + if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) + return -EFAULT; + + if (insn_decode(insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) + return -EINVAL; + } + return 0; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -554,20 +579,13 @@ static int handle_mmio_read(struct insn *insn, enum i= nsn_mmio_type mmio, int siz static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { unsigned long vaddr; - char buffer[MAX_INSN_SIZE]; enum insn_mmio_type mmio; struct insn insn =3D {}; int size, ret; =20 - /* Only in-kernel MMIO is supported */ - if (WARN_ON_ONCE(user_mode(regs))) - return -EFAULT; - - if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) - return -EFAULT; - - if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) - return -EINVAL; + ret =3D decode_insn_struct(&insn, regs); + if (ret) + return ret; =20 mmio =3D insn_decode_mmio(&insn, &size); if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) @@ -763,6 +781,10 @@ static int virt_exception_user(struct pt_regs *regs, s= truct ve_info *ve) switch (ve->exit_reason) { case EXIT_REASON_CPUID: return handle_cpuid(regs, ve); + case EXIT_REASON_EPT_VIOLATION: + if (is_private_gpa(ve->gpa)) + panic("Unexpected EPT-violation on private memory."); + return handle_mmio(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); return -EIO; --=20 2.45.2 From nobody Fri Dec 19 22:01:07 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 050E618A6AC; Tue, 30 Jul 2024 17:36:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360994; cv=none; b=GLDNyWum1tTRSDmnOzIXS108fAUVOf0+EJTuaZ5WVdZz6hNQ+lSFqP5pmqO/QCoWprf8QDu0wYYE4LBtQkyaJJvpDgdwzWJORIcr7maN4X4Wpmq0yE1NTNkVRf94SWxphGT8IBvJ19YonWKnpKm/4e1qaW38JDteBzJWxJxT7+0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722360994; c=relaxed/simple; bh=4gmNvshIhA5Sq/t3xitvxYwN5dATcceEcntvV5Tbdm0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MDuYmP7zebPz9R/dxFlXwl3PMksjvJdC5L6RHsTLaSsTTp4sm5RZN7jcjrKDKaTQlW/zRFg6ynLPA+dWorwM7yGrpUh3bPKOmGQgdXnJP3WVlv+7tSXsyxAszdCXYCCZ1A5OA5g2gGd2lwxr++WK4GGNdE41/4LWlwA11pHZCHs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ojjgZ7Tk; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ojjgZ7Tk" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EF607C4AF0C; Tue, 30 Jul 2024 17:36:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722360993; bh=4gmNvshIhA5Sq/t3xitvxYwN5dATcceEcntvV5Tbdm0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ojjgZ7TkPjiezoupGF7Kfk4AuVW5AZV0nJSD39WvdcqK+zNV67c/usjPygTxHaW93 Hn42dqlghqgOblsw9nUoKCtiYv2CbgQGoG1Xqk+siFlkLU8UhKfIG+ow1F8gLoUpvc Jt1+svcxy7XTJyq3ajx5Grl27CMH1LTJZf77FmUGWO0r/gO1pgxy/5XOb4jslKRd0+ 3U83m0Cfj9uuilhcrZuALH9ij/rBHxfyfXV2l61ws+wzCE9yCKtDwFzofHZcSHD914 o46t2LK7X2JMOSWvKdov37S3ZdRj4fsBlsu27WCBoJN/Cbl95GrS+jKFxgFddhUW8B SDEpK/DrSyb1A== From: "Alexey Gladkov (Intel)" To: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev Cc: Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , "H. Peter Anvin" , "Kirill A. Shutemov" , Andrew Morton , Yuan Yao , Geert Uytterhoeven , Yuntao Wang , Kai Huang , Baoquan He , Oleg Nesterov , Joerg Roedel , Tom Lendacky , cho@microsoft.com, decui@microsoft.com, John.Starks@microsoft.com Subject: [PATCH v1 4/4] x86/tdx: Implement movs for MMIO Date: Tue, 30 Jul 2024 19:35:59 +0200 Message-ID: X-Mailer: git-send-email 2.45.2 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Adapt AMD's implementation of the MOVS instruction. Since the implementations are similar, it is possible to reuse the code. MOVS emulation consists of dividing it into a series of read and write operations, which in turn will be validated separately. Signed-off-by: Alexey Gladkov (Intel) --- I don't really understand the reasoning behind AMD's approach of returning = to userspace after every read/write operation in vc_handle_mmio_movs(). I didn= 't change this so as not to break their implementation. But if this can be changed then the whole vc_handle_mmio_movs() could be us= ed as a common helper. arch/x86/coco/sev/core.c | 133 ++++---------------------------------- arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++-- arch/x86/include/asm/io.h | 3 + arch/x86/lib/iomem.c | 132 +++++++++++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 125 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc..3135c89802e9 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -369,72 +369,17 @@ static enum es_result vc_decode_insn(struct es_em_ctx= t *ctxt) static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, char *dst, char *buf, size_t size) { - unsigned long error_code =3D X86_PF_PROT | X86_PF_WRITE; + unsigned long error_code; + int ret =3D __put_iomem(dst, buf, size); =20 - /* - * This function uses __put_user() independent of whether kernel or user - * memory is accessed. This works fine because __put_user() does no - * sanity checks of the pointer being accessed. All that it does is - * to report when the access failed. - * - * Also, this function runs in atomic context, so __put_user() is not - * allowed to sleep. The page-fault handler detects that it is running - * in atomic context and will not try to take mmap_sem and handle the - * fault, so additional pagefault_enable()/disable() calls are not - * needed. - * - * The access can't be done via copy_to_user() here because - * vc_write_mem() must not use string instructions to access unsafe - * memory. The reason is that MOVS is emulated by the #VC handler by - * splitting the move up into a read and a write and taking a nested #VC - * exception on whatever of them is the MMIO access. Using string - * instructions here would cause infinite nesting. - */ - switch (size) { - case 1: { - u8 d1; - u8 __user *target =3D (u8 __user *)dst; - - memcpy(&d1, buf, 1); - if (__put_user(d1, target)) - goto fault; - break; - } - case 2: { - u16 d2; - u16 __user *target =3D (u16 __user *)dst; - - memcpy(&d2, buf, 2); - if (__put_user(d2, target)) - goto fault; - break; - } - case 4: { - u32 d4; - u32 __user *target =3D (u32 __user *)dst; - - memcpy(&d4, buf, 4); - if (__put_user(d4, target)) - goto fault; - break; - } - case 8: { - u64 d8; - u64 __user *target =3D (u64 __user *)dst; + if (!ret) + return ES_OK; =20 - memcpy(&d8, buf, 8); - if (__put_user(d8, target)) - goto fault; - break; - } - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + if (ret =3D=3D -EIO) return ES_UNSUPPORTED; - } =20 - return ES_OK; + error_code =3D X86_PF_PROT | X86_PF_WRITE; =20 -fault: if (user_mode(ctxt->regs)) error_code |=3D X86_PF_USER; =20 @@ -448,71 +393,17 @@ static enum es_result vc_write_mem(struct es_em_ctxt = *ctxt, static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, char *src, char *buf, size_t size) { - unsigned long error_code =3D X86_PF_PROT; + unsigned long error_code; + int ret =3D __get_iomem(src, buf, size); =20 - /* - * This function uses __get_user() independent of whether kernel or user - * memory is accessed. This works fine because __get_user() does no - * sanity checks of the pointer being accessed. All that it does is - * to report when the access failed. - * - * Also, this function runs in atomic context, so __get_user() is not - * allowed to sleep. The page-fault handler detects that it is running - * in atomic context and will not try to take mmap_sem and handle the - * fault, so additional pagefault_enable()/disable() calls are not - * needed. - * - * The access can't be done via copy_from_user() here because - * vc_read_mem() must not use string instructions to access unsafe - * memory. The reason is that MOVS is emulated by the #VC handler by - * splitting the move up into a read and a write and taking a nested #VC - * exception on whatever of them is the MMIO access. Using string - * instructions here would cause infinite nesting. - */ - switch (size) { - case 1: { - u8 d1; - u8 __user *s =3D (u8 __user *)src; - - if (__get_user(d1, s)) - goto fault; - memcpy(buf, &d1, 1); - break; - } - case 2: { - u16 d2; - u16 __user *s =3D (u16 __user *)src; - - if (__get_user(d2, s)) - goto fault; - memcpy(buf, &d2, 2); - break; - } - case 4: { - u32 d4; - u32 __user *s =3D (u32 __user *)src; + if (!ret) + return ES_OK; =20 - if (__get_user(d4, s)) - goto fault; - memcpy(buf, &d4, 4); - break; - } - case 8: { - u64 d8; - u64 __user *s =3D (u64 __user *)src; - if (__get_user(d8, s)) - goto fault; - memcpy(buf, &d8, 8); - break; - } - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + if (ret =3D=3D -EIO) return ES_UNSUPPORTED; - } =20 - return ES_OK; + error_code =3D X86_PF_PROT; =20 -fault: if (user_mode(ctxt->regs)) error_code |=3D X86_PF_USER; =20 diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 26b2e52457be..cf209381d63b 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -499,6 +499,53 @@ static int decode_insn_struct(struct insn *insn, struc= t pt_regs *regs) return 0; } =20 +static int handle_mmio_movs(struct insn *insn, struct pt_regs *regs, int s= ize, struct ve_info *ve) +{ + unsigned long ds_base, es_base; + unsigned char *src, *dst; + unsigned char buffer[8]; + int off, ret; + bool rep; + + /* + * The in-kernel code must use a special API that does not use MOVS. + * If the MOVS instruction is received from in-kernel, then something + * is broken. + */ + WARN_ON_ONCE(!user_mode(regs)); + + ds_base =3D insn_get_seg_base(regs, INAT_SEG_REG_DS); + es_base =3D insn_get_seg_base(regs, INAT_SEG_REG_ES); + + if (ds_base =3D=3D -1L || es_base =3D=3D -1L) + return -EINVAL; + + rep =3D insn_has_rep_prefix(insn); + + do { + src =3D ds_base + (unsigned char *) regs->si; + dst =3D es_base + (unsigned char *) regs->di; + + ret =3D __get_iomem(src, buffer, size); + if (ret) + return ret; + + ret =3D __put_iomem(dst, buffer, size); + if (ret) + return ret; + + off =3D (regs->flags & X86_EFLAGS_DF) ? -size : size; + + regs->si +=3D off; + regs->di +=3D off; + + if (rep) + regs->cx -=3D 1; + } while (rep || regs->cx > 0); + + return insn->length; +} + static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, = int size, struct pt_regs *regs, struct ve_info *ve) { @@ -520,9 +567,8 @@ static int handle_mmio_write(struct insn *insn, enum in= sn_mmio_type mmio, int si return insn->length; case INSN_MMIO_MOVS: /* - * MMIO was accessed with an instruction that could not be - * decoded or handled properly. It was likely not using io.h - * helpers or accessed MMIO accidentally. + * MOVS is processed through higher level emulation which breaks + * this instruction into a sequence of reads and writes. */ return -EINVAL; default: @@ -591,6 +637,9 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) if (WARN_ON_ONCE(mmio =3D=3D INSN_MMIO_DECODE_FAILED)) return -EINVAL; =20 + if (mmio =3D=3D INSN_MMIO_MOVS) + return handle_mmio_movs(&insn, regs, size, ve); + vaddr =3D (unsigned long)insn_get_addr_ref(&insn, regs); =20 if (user_mode(regs)) { @@ -619,7 +668,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve_= info *ve) switch (mmio) { case INSN_MMIO_WRITE: case INSN_MMIO_WRITE_IMM: - case INSN_MMIO_MOVS: ret =3D handle_mmio_write(&insn, mmio, size, regs, ve); break; case INSN_MMIO_READ: diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 1d60427379c9..ac01d53466cb 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -402,4 +402,7 @@ static inline void iosubmit_cmds512(void __iomem *dst, = const void *src, } } =20 +int __get_iomem(char *src, char *buf, size_t size); +int __put_iomem(char *src, char *buf, size_t size); + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index 5eecb45d05d5..e4919ad22206 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -2,6 +2,7 @@ #include #include #include +#include =20 #define movs(type,to,from) \ asm volatile("movs" type:"=3D&D" (to), "=3D&S" (from):"0" (to), "1" (from= ):"memory") @@ -124,3 +125,134 @@ void memset_io(volatile void __iomem *a, int b, size_= t c) } } EXPORT_SYMBOL(memset_io); + +int __get_iomem(char *src, char *buf, size_t size) +{ + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * mmio_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ + switch (size) { + case 1: { + u8 d1; + u8 __user *s =3D (u8 __user *)src; + + if (__get_user(d1, s)) + return -EFAULT; + memcpy(buf, &d1, 1); + break; + } + case 2: { + u16 d2; + u16 __user *s =3D (u16 __user *)src; + + if (__get_user(d2, s)) + return -EFAULT; + memcpy(buf, &d2, 2); + break; + } + case 4: { + u32 d4; + u32 __user *s =3D (u32 __user *)src; + + if (__get_user(d4, s)) + return -EFAULT; + memcpy(buf, &d4, 4); + break; + } + case 8: { + u64 d8; + u64 __user *s =3D (u64 __user *)src; + if (__get_user(d8, s)) + return -EFAULT; + memcpy(buf, &d8, 8); + break; + } + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return -EIO; + } + + return 0; +} + +int __put_iomem(char *dst, char *buf, size_t size) +{ + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * put_iomem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ + switch (size) { + case 1: { + u8 d1; + u8 __user *target =3D (u8 __user *)dst; + + memcpy(&d1, buf, 1); + if (__put_user(d1, target)) + return -EFAULT; + break; + } + case 2: { + u16 d2; + u16 __user *target =3D (u16 __user *)dst; + + memcpy(&d2, buf, 2); + if (__put_user(d2, target)) + return -EFAULT; + break; + } + case 4: { + u32 d4; + u32 __user *target =3D (u32 __user *)dst; + + memcpy(&d4, buf, 4); + if (__put_user(d4, target)) + return -EFAULT; + break; + } + case 8: { + u64 d8; + u64 __user *target =3D (u64 __user *)dst; + + memcpy(&d8, buf, 8); + if (__put_user(d8, target)) + return -EFAULT; + break; + } + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return -EIO; + } + + return 0; +} --=20 2.45.2