[PATCH v9 06/13] x86/um: nommu: process/thread handling

Hajime Tazaki posted 13 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH v9 06/13] x86/um: nommu: process/thread handling
Posted by Hajime Tazaki 3 months, 3 weeks ago
Since ptrace facility isn't used under !MMU of UML, there is different
code path to invoke processes/threads; there are no external process
used, and need to properly configure some of registers (fs segment
register for TLS, etc) on every context switch, etc.

Signals aren't delivered in non-ptrace syscall entry/leave so, we also
need to handle pending signal by ourselves.

ptrace related syscalls are not tested yet so, marked
arch_has_single_step() unsupported in !MMU environment.

Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
---
 arch/um/include/asm/ptrace-generic.h |  2 +-
 arch/x86/um/Makefile                 |  3 +-
 arch/x86/um/nommu/Makefile           |  2 +-
 arch/x86/um/nommu/entry_64.S         | 22 ++++++++++++++
 arch/x86/um/nommu/syscalls_64.c      | 44 ++++++++++++++++++++++++++++
 5 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/um/nommu/syscalls_64.c

diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 4ff844bcb1cd..a9778c9a59a3 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -14,7 +14,7 @@ struct pt_regs {
 	struct uml_pt_regs regs;
 };
 
-#define arch_has_single_step()	(1)
+#define arch_has_single_step()	(IS_ENABLED(CONFIG_MMU))
 
 #define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS }
 
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 227af2a987e2..53c9ebb3c41c 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -27,7 +27,8 @@ subarch-y += ../kernel/sys_ia32.o
 
 else
 
-obj-y += syscalls_64.o vdso/
+obj-y += vdso/
+obj-$(CONFIG_MMU) += syscalls_64.o
 
 subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
 	../lib/memmove_64.o ../lib/memset_64.o
diff --git a/arch/x86/um/nommu/Makefile b/arch/x86/um/nommu/Makefile
index ebe47d4836f4..4018d9e0aba0 100644
--- a/arch/x86/um/nommu/Makefile
+++ b/arch/x86/um/nommu/Makefile
@@ -5,4 +5,4 @@ else
 	BITS := 64
 endif
 
-obj-y = do_syscall_$(BITS).o entry_$(BITS).o os-Linux/
+obj-y = do_syscall_$(BITS).o entry_$(BITS).o syscalls_$(BITS).o os-Linux/
diff --git a/arch/x86/um/nommu/entry_64.S b/arch/x86/um/nommu/entry_64.S
index e9bfc7b93c84..950447dfa66b 100644
--- a/arch/x86/um/nommu/entry_64.S
+++ b/arch/x86/um/nommu/entry_64.S
@@ -89,3 +89,25 @@ ENTRY(__kernel_vsyscall)
 	jmp	*%rcx
 
 END(__kernel_vsyscall)
+
+// void userspace(struct uml_pt_regs *regs)
+ENTRY(userspace)
+
+	/* align the stack for x86_64 ABI */
+	and     $-0x10, %rsp
+	/* Handle any immediate reschedules or signals */
+	call	interrupt_end
+
+	movq	current_ptregs, %rsp
+
+	POP_REGS
+
+	addq	$8, %rsp	/* skip orig_ax */
+	popq	%r11		/* pt_regs->ip */
+	addq	$8, %rsp	/* skip cs */
+	addq	$8, %rsp	/* skip flags */
+	popq	%rsp
+
+	jmp	*%r11
+
+END(userspace)
diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c
new file mode 100644
index 000000000000..c78c442aed1d
--- /dev/null
+++ b/arch/x86/um/nommu/syscalls_64.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <registers.h>
+#include <os.h>
+#include "syscalls.h"
+
+void arch_switch_to(struct task_struct *to)
+{
+	/*
+	 * In !CONFIG_MMU, it doesn't ptrace thus,
+	 * The FS_BASE/GS_BASE registers are saved here.
+	 */
+	current_top_of_stack = task_top_of_stack(to);
+	current_ptregs = (long)task_pt_regs(to);
+
+	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0) ||
+	    (to->mm == NULL))
+		return;
+
+	/* this changes the FS on every context switch */
+	arch_prctl(to, ARCH_SET_FS,
+		   (void __user *) to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)]);
+}
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, off)
+{
+	if (off & ~PAGE_MASK)
+		return -EINVAL;
+
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
-- 
2.43.0
Re: [PATCH v9 06/13] x86/um: nommu: process/thread handling
Posted by Benjamin Berg 3 months, 3 weeks ago
Hi,

On Thu, 2025-06-19 at 10:04 +0900, Hajime Tazaki wrote:
> Since ptrace facility isn't used under !MMU of UML, there is different
> code path to invoke processes/threads; there are no external process
> used, and need to properly configure some of registers (fs segment
> register for TLS, etc) on every context switch, etc.
> 
> Signals aren't delivered in non-ptrace syscall entry/leave so, we also
> need to handle pending signal by ourselves.
> 
> ptrace related syscalls are not tested yet so, marked
> arch_has_single_step() unsupported in !MMU environment.
> 
> Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
> Signed-off-by: Ricardo Koller <ricarkol@google.com>
> ---
>  arch/um/include/asm/ptrace-generic.h |  2 +-
>  arch/x86/um/Makefile                 |  3 +-
>  arch/x86/um/nommu/Makefile           |  2 +-
>  arch/x86/um/nommu/entry_64.S         | 22 ++++++++++++++
>  arch/x86/um/nommu/syscalls_64.c      | 44 ++++++++++++++++++++++++++++
>  5 files changed, 70 insertions(+), 3 deletions(-)
>  create mode 100644 arch/x86/um/nommu/syscalls_64.c
> 
> diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
> index 4ff844bcb1cd..a9778c9a59a3 100644
> --- a/arch/um/include/asm/ptrace-generic.h
> +++ b/arch/um/include/asm/ptrace-generic.h
> @@ -14,7 +14,7 @@ struct pt_regs {
>  	struct uml_pt_regs regs;
>  };
>  
> -#define arch_has_single_step()	(1)
> +#define arch_has_single_step()	(IS_ENABLED(CONFIG_MMU))
>  
>  #define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS }
>  
> diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
> index 227af2a987e2..53c9ebb3c41c 100644
> --- a/arch/x86/um/Makefile
> +++ b/arch/x86/um/Makefile
> @@ -27,7 +27,8 @@ subarch-y += ../kernel/sys_ia32.o
>  
>  else
>  
> -obj-y += syscalls_64.o vdso/
> +obj-y += vdso/
> +obj-$(CONFIG_MMU) += syscalls_64.o
>  
>  subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
>  	../lib/memmove_64.o ../lib/memset_64.o
> diff --git a/arch/x86/um/nommu/Makefile b/arch/x86/um/nommu/Makefile
> index ebe47d4836f4..4018d9e0aba0 100644
> --- a/arch/x86/um/nommu/Makefile
> +++ b/arch/x86/um/nommu/Makefile
> @@ -5,4 +5,4 @@ else
>  	BITS := 64
>  endif
>  
> -obj-y = do_syscall_$(BITS).o entry_$(BITS).o os-Linux/
> +obj-y = do_syscall_$(BITS).o entry_$(BITS).o syscalls_$(BITS).o os-Linux/
> diff --git a/arch/x86/um/nommu/entry_64.S b/arch/x86/um/nommu/entry_64.S
> index e9bfc7b93c84..950447dfa66b 100644
> --- a/arch/x86/um/nommu/entry_64.S
> +++ b/arch/x86/um/nommu/entry_64.S
> @@ -89,3 +89,25 @@ ENTRY(__kernel_vsyscall)
>  	jmp	*%rcx
>  
>  END(__kernel_vsyscall)
> +
> +// void userspace(struct uml_pt_regs *regs)
> +ENTRY(userspace)
> +
> +	/* align the stack for x86_64 ABI */
> +	and     $-0x10, %rsp
> +	/* Handle any immediate reschedules or signals */
> +	call	interrupt_end
> +
> +	movq	current_ptregs, %rsp
> +
> +	POP_REGS
> +
> +	addq	$8, %rsp	/* skip orig_ax */
> +	popq	%r11		/* pt_regs->ip */
> +	addq	$8, %rsp	/* skip cs */
> +	addq	$8, %rsp	/* skip flags */
> +	popq	%rsp
> +
> +	jmp	*%r11
> +
> +END(userspace)
> diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c
> new file mode 100644
> index 000000000000..c78c442aed1d
> --- /dev/null
> +++ b/arch/x86/um/nommu/syscalls_64.c
> @@ -0,0 +1,44 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
> + * Copyright 2003 PathScale, Inc.
> + *
> + * Licensed under the GPL
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/sched/mm.h>
> +#include <linux/syscalls.h>
> +#include <linux/uaccess.h>
> +#include <asm/prctl.h> /* XXX This should get the constants from libc */
> +#include <registers.h>
> +#include <os.h>
> +#include "syscalls.h"
> +
> +void arch_switch_to(struct task_struct *to)
> +{
> +	/*
> +	 * In !CONFIG_MMU, it doesn't ptrace thus,
> +	 * The FS_BASE/GS_BASE registers are saved here.
> +	 */
> +	current_top_of_stack = task_top_of_stack(to);
> +	current_ptregs = (long)task_pt_regs(to);
> +
> +	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0) ||
> +	    (to->mm == NULL))
> +		return;
> +
> +	/* this changes the FS on every context switch */
> +	arch_prctl(to, ARCH_SET_FS,
> +		   (void __user *) to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)]);

Hmm,  the comment mentions FS_BASE/GS_BASE, but here you only handle
FS_BASE? Is that intentional?

Benjamin


> +}
> +
> +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
> +		unsigned long, prot, unsigned long, flags,
> +		unsigned long, fd, unsigned long, off)
> +{
> +	if (off & ~PAGE_MASK)
> +		return -EINVAL;
> +
> +	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
> +}
Re: [PATCH v9 06/13] x86/um: nommu: process/thread handling
Posted by Hajime Tazaki 3 months, 3 weeks ago
Hello,

On Thu, 19 Jun 2025 19:36:04 +0900,
Benjamin Berg wrote:

> > +void arch_switch_to(struct task_struct *to)
> > +{
> > +	/*
> > +	 * In !CONFIG_MMU, it doesn't ptrace thus,
> > +	 * The FS_BASE/GS_BASE registers are saved here.
> > +	 */
> > +	current_top_of_stack = task_top_of_stack(to);
> > +	current_ptregs = (long)task_pt_regs(to);
> > +
> > +	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0) ||
> > +	    (to->mm == NULL))
> > +		return;
> > +
> > +	/* this changes the FS on every context switch */
> > +	arch_prctl(to, ARCH_SET_FS,
> > +		   (void __user *) to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)]);
> 
> Hmm,  the comment mentions FS_BASE/GS_BASE, but here you only handle
> FS_BASE? Is that intentional?

thanks for the comment.

my intention is only specific to fs_base as I saw the register needs
to restore during this switch while with gs_base I didn't.  I can
update it if I see a real issue without it.

The comment needs to be updated (I guess this is copied/pasted from
the original arch_switch_to comment).

-- Hajime