[PATCH v5 6/6] x86/tdx: Implement MOVS for MMIO

Alexey Gladkov posted 6 patches 1 year, 3 months ago
There is a newer version of this series
[PATCH v5 6/6] x86/tdx: Implement MOVS for MMIO
Posted by Alexey Gladkov 1 year, 3 months ago
From: "Alexey Gladkov (Intel)" <legion@kernel.org>

Add emulation of the MOVS instruction on MMIO regions. MOVS emulation
consists of dividing it into a series of read and write operations,
which in turn will be validated separately.

This implementation is based on the same principle as in SEV. It splits
MOVS into separate read and write operations, which in turn can cause
nested #VEs depending on which of the arguments caused the first #VE.

The difference with the SEV implementation is the execution context. SEV
code is executed in atomic context. Exception handler in TDX is executed
with interrupts enabled. That's why the approach to locking is
different. In TDX, mmap_lock is taken to verify and emulate the
instruction.

Another difference is how the read and write instructions are executed
for MOVS emulation. While in SEV each read/write operation returns to
user space, in TDX these operations are performed from the kernel
context.

It may be possible to achieve more code reuse at this point,
but it would require confirmation from SEV that such a thing wouldn't
break anything.

Signed-off-by: Alexey Gladkov (Intel) <legion@kernel.org>
---
 arch/x86/coco/tdx/tdx.c          | 82 ++++++++++++++++++++++++++++----
 arch/x86/include/asm/processor.h |  3 ++
 2 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 65f65015238a..a9b3c6dee9ad 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -518,6 +518,60 @@ static int decode_insn_struct(struct insn *insn, struct pt_regs *regs)
 	return 0;
 }
 
+static int handle_mmio_movs(struct insn *insn, struct pt_regs *regs, int size, struct ve_info *ve)
+{
+	unsigned long ds_base, es_base;
+	unsigned char *src, *dst;
+	unsigned char buffer[8];
+	int off, ret;
+	bool rep;
+
+	/*
+	 * The in-kernel code must use a special API that does not use MOVS.
+	 * If the MOVS instruction is received from in-kernel, then something
+	 * is broken.
+	 */
+	if (WARN_ON_ONCE(!user_mode(regs)))
+		return -EFAULT;
+
+	ds_base = insn_get_seg_base(regs, INAT_SEG_REG_DS);
+	es_base = insn_get_seg_base(regs, INAT_SEG_REG_ES);
+
+	if (ds_base == -1L || es_base == -1L)
+		return -EINVAL;
+
+	current->thread.in_mmio_emul = 1;
+
+	rep = insn_has_rep_prefix(insn);
+
+	do {
+		src = ds_base + (unsigned char *) regs->si;
+		dst = es_base + (unsigned char *) regs->di;
+
+		ret = __get_iomem(src, buffer, size);
+		if (ret)
+			goto out;
+
+		ret = __put_iomem(dst, buffer, size);
+		if (ret)
+			goto out;
+
+		off = (regs->flags & X86_EFLAGS_DF) ? -size : size;
+
+		regs->si += off;
+		regs->di += off;
+
+		if (rep)
+			regs->cx -= 1;
+	} while (rep || regs->cx > 0);
+
+	ret = insn->length;
+out:
+	current->thread.in_mmio_emul = 0;
+
+	return ret;
+}
+
 static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, int size,
 			     struct pt_regs *regs, struct ve_info *ve)
 {
@@ -539,9 +593,8 @@ static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, int si
 		return insn->length;
 	case INSN_MMIO_MOVS:
 		/*
-		 * MMIO was accessed with an instruction that could not be
-		 * decoded or handled properly. It was likely not using io.h
-		 * helpers or accessed MMIO accidentally.
+		 * MOVS is processed through higher level emulation which breaks
+		 * this instruction into a sequence of reads and writes.
 		 */
 		return -EINVAL;
 	default:
@@ -600,6 +653,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
 {
 	enum insn_mmio_type mmio;
 	struct insn insn = {};
+	int need_validation;
 	unsigned long vaddr;
 	int size, ret;
 
@@ -611,14 +665,27 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
 	if (WARN_ON_ONCE(mmio == INSN_MMIO_DECODE_FAILED))
 		return -EINVAL;
 
+	if (mmio == INSN_MMIO_MOVS)
+		return handle_mmio_movs(&insn, regs, size, ve);
+
+	need_validation = user_mode(regs);
+
 	if (!user_mode(regs) && !is_kernel_addr(ve->gla)) {
-		WARN_ONCE(1, "Access to userspace address is not supported");
-		return -EINVAL;
+		/*
+		 * Access from kernel to userspace addresses is not allowed
+		 * unless it is a nested exception during MOVS emulation.
+		 */
+		if (!current->thread.in_mmio_emul || !current->mm) {
+			WARN_ONCE(1, "Access to userspace address is not supported");
+			return -EINVAL;
+		}
+
+		need_validation = 1;
 	}
 
 	vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
 
-	if (user_mode(regs)) {
+	if (need_validation) {
 		if (mmap_read_lock_killable(current->mm))
 			return -EINTR;
 
@@ -644,7 +711,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
 	switch (mmio) {
 	case INSN_MMIO_WRITE:
 	case INSN_MMIO_WRITE_IMM:
-	case INSN_MMIO_MOVS:
 		ret = handle_mmio_write(&insn, mmio, size, regs, ve);
 		break;
 	case INSN_MMIO_READ:
@@ -665,7 +731,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
 		ret = -EINVAL;
 	}
 unlock:
-	if (user_mode(regs))
+	if (need_validation)
 		mmap_read_unlock(current->mm);
 
 	return ret;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a75a07f4931f..33875a217ed8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -486,6 +486,9 @@ struct thread_struct {
 	unsigned long		iopl_emul;
 
 	unsigned int		iopl_warn:1;
+#ifdef CONFIG_INTEL_TDX_GUEST
+	unsigned int		in_mmio_emul:1;
+#endif
 
 	/*
 	 * Protection Keys Register for Userspace.  Loaded immediately on
-- 
2.46.0
Re: [PATCH v5 6/6] x86/tdx: Implement MOVS for MMIO
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Wed, Aug 28, 2024 at 12:44:36PM +0200, Alexey Gladkov wrote:
> From: "Alexey Gladkov (Intel)" <legion@kernel.org>
> 
> Add emulation of the MOVS instruction on MMIO regions. MOVS emulation
> consists of dividing it into a series of read and write operations,
> which in turn will be validated separately.
> 
> This implementation is based on the same principle as in SEV. It splits
> MOVS into separate read and write operations, which in turn can cause
> nested #VEs depending on which of the arguments caused the first #VE.
> 
> The difference with the SEV implementation is the execution context. SEV
> code is executed in atomic context. Exception handler in TDX is executed
> with interrupts enabled. That's why the approach to locking is
> different. In TDX, mmap_lock is taken to verify and emulate the
> instruction.
> 
> Another difference is how the read and write instructions are executed
> for MOVS emulation. While in SEV each read/write operation returns to
> user space, in TDX these operations are performed from the kernel
> context.

It looks like SEV only returns to userspace to retry the instruction after
stepping on failed __get_user()/__put_user(), unrolling back to
vc_raw_handle_exception() and handling page fault there.

But I'm not sure what happens with #VC inside vc_read_mem() and
vc_write_mem(). Can the #VC exception be nested? Tom?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH v5 6/6] x86/tdx: Implement MOVS for MMIO
Posted by Alexey Gladkov 1 year, 3 months ago
On Thu, Aug 29, 2024 at 03:44:55PM +0300, Kirill A. Shutemov wrote:
> On Wed, Aug 28, 2024 at 12:44:36PM +0200, Alexey Gladkov wrote:
> > From: "Alexey Gladkov (Intel)" <legion@kernel.org>
> > 
> > Add emulation of the MOVS instruction on MMIO regions. MOVS emulation
> > consists of dividing it into a series of read and write operations,
> > which in turn will be validated separately.
> > 
> > This implementation is based on the same principle as in SEV. It splits
> > MOVS into separate read and write operations, which in turn can cause
> > nested #VEs depending on which of the arguments caused the first #VE.
> > 
> > The difference with the SEV implementation is the execution context. SEV
> > code is executed in atomic context. Exception handler in TDX is executed
> > with interrupts enabled. That's why the approach to locking is
> > different. In TDX, mmap_lock is taken to verify and emulate the
> > instruction.
> > 
> > Another difference is how the read and write instructions are executed
> > for MOVS emulation. While in SEV each read/write operation returns to
> > user space, in TDX these operations are performed from the kernel
> > context.
> 
> It looks like SEV only returns to userspace to retry the instruction after
> stepping on failed __get_user()/__put_user(), unrolling back to
> vc_raw_handle_exception() and handling page fault there.

In vc_handle_mmio_movs() if regs->cx is not zero we return ES_RETRY. The
vc_handle_mmio(), vc_handle_exitcode() return it as is. In
vc_raw_handle_exception() if vc_handle_exitcode() returns ES_RETRY then we
just return true. So, the ES_RETRY is not further visible.

Or am I missing something?

> 
> But I'm not sure what happens with #VC inside vc_read_mem() and
> vc_write_mem(). Can the #VC exception be nested? Tom?
> 
> -- 
>   Kiryl Shutsemau / Kirill A. Shutemov
> 

-- 
Rgrds, legion
Re: [PATCH v5 6/6] x86/tdx: Implement MOVS for MMIO
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Thu, Aug 29, 2024 at 08:40:56PM +0200, Alexey Gladkov wrote:
> On Thu, Aug 29, 2024 at 03:44:55PM +0300, Kirill A. Shutemov wrote:
> > On Wed, Aug 28, 2024 at 12:44:36PM +0200, Alexey Gladkov wrote:
> > > From: "Alexey Gladkov (Intel)" <legion@kernel.org>
> > > 
> > > Add emulation of the MOVS instruction on MMIO regions. MOVS emulation
> > > consists of dividing it into a series of read and write operations,
> > > which in turn will be validated separately.
> > > 
> > > This implementation is based on the same principle as in SEV. It splits
> > > MOVS into separate read and write operations, which in turn can cause
> > > nested #VEs depending on which of the arguments caused the first #VE.
> > > 
> > > The difference with the SEV implementation is the execution context. SEV
> > > code is executed in atomic context. Exception handler in TDX is executed
> > > with interrupts enabled. That's why the approach to locking is
> > > different. In TDX, mmap_lock is taken to verify and emulate the
> > > instruction.
> > > 
> > > Another difference is how the read and write instructions are executed
> > > for MOVS emulation. While in SEV each read/write operation returns to
> > > user space, in TDX these operations are performed from the kernel
> > > context.
> > 
> > It looks like SEV only returns to userspace to retry the instruction after
> > stepping on failed __get_user()/__put_user(), unrolling back to
> > vc_raw_handle_exception() and handling page fault there.
> 
> In vc_handle_mmio_movs() if regs->cx is not zero we return ES_RETRY. The
> vc_handle_mmio(), vc_handle_exitcode() return it as is. In
> vc_raw_handle_exception() if vc_handle_exitcode() returns ES_RETRY then we
> just return true. So, the ES_RETRY is not further visible.
> 
> Or am I missing something?

You are right. I didn't see this codepath.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov