[PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation

Hajime Tazaki posted 13 patches 7 months, 3 weeks ago
There is a newer version of this series
[PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation
Posted by Hajime Tazaki 7 months, 3 weeks ago
As userspace on UML/!MMU also need to configure %fs register when it is
running to correctly access thread structure, host syscalls implemented
in os-Linux drivers may be puzzled when they are called.  Thus it has to
configure %fs register via arch_prctl(SET_FS) on every host syscalls.

Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
---
 arch/um/include/shared/os.h       |  6 +++
 arch/um/os-Linux/process.c        |  6 +++
 arch/um/os-Linux/start_up.c       | 21 +++++++++
 arch/x86/um/nommu/do_syscall_64.c | 37 ++++++++++++++++
 arch/x86/um/nommu/syscalls_64.c   | 71 +++++++++++++++++++++++++++++++
 5 files changed, 141 insertions(+)

diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 1251f08e26d0..7c6a8bc0447c 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -189,6 +189,7 @@ extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 extern void get_host_cpu_features(
 	void (*flags_helper_func)(char *line),
 	void (*cache_helper_func)(char *line));
+extern int host_has_fsgsbase;
 
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
@@ -213,6 +214,11 @@ extern int os_protect_memory(void *addr, unsigned long len,
 extern int os_unmap_memory(void *addr, int len);
 extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
+extern int os_arch_prctl(int pid, int option, unsigned long *arg);
+#ifndef CONFIG_MMU
+extern long long host_fs;
+#endif
+
 
 void os_set_pdeathsig(void);
 
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 8a1ab59a089f..3a6d34ccd12b 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -16,6 +16,7 @@
 #include <sys/prctl.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
 #include <linux/threads.h>
 #include <init.h>
 #include <longjmp.h>
@@ -178,6 +179,11 @@ int __init can_drop_memory(void)
 	return ok;
 }
 
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
+{
+	return syscall(SYS_arch_prctl, option, arg2);
+}
+
 void init_new_thread_signals(void)
 {
 	set_handler(SIGSEGV);
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 4e1f05360c49..55dd92bd2a0b 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -20,6 +20,8 @@
 #include <sys/resource.h>
 #include <asm/ldt.h>
 #include <asm/unistd.h>
+#include <sys/auxv.h>
+#include <asm/hwcap2.h>
 #include <init.h>
 #include <os.h>
 #include <kern_util.h>
@@ -36,6 +38,8 @@
 #include <skas.h>
 #include "internal.h"
 
+int host_has_fsgsbase;
+
 static void ptrace_child(void)
 {
 	int ret;
@@ -459,6 +463,20 @@ __uml_setup("seccomp=", uml_seccomp_config,
 "    This is insecure and should only be used with a trusted userspace\n\n"
 );
 
+static void __init check_fsgsbase(void)
+{
+	unsigned long auxv = getauxval(AT_HWCAP2);
+
+	os_info("Checking FSGSBASE instructions...");
+	if (auxv & HWCAP2_FSGSBASE) {
+		host_has_fsgsbase = 1;
+		os_info("OK\n");
+	} else {
+		host_has_fsgsbase = 0;
+		os_info("disabled\n");
+	}
+}
+
 void __init os_early_checks(void)
 {
 	int pid;
@@ -484,6 +502,9 @@ void __init os_early_checks(void)
 	using_seccomp = 0;
 	check_ptrace();
 
+	/* probe fsgsbase instruction */
+	check_fsgsbase();
+
 	pid = start_ptraced_child();
 	if (init_pid_registers(pid))
 		fatal("Failed to initialize default registers");
diff --git a/arch/x86/um/nommu/do_syscall_64.c b/arch/x86/um/nommu/do_syscall_64.c
index 5d0fa83e7fdc..796beb0089fc 100644
--- a/arch/x86/um/nommu/do_syscall_64.c
+++ b/arch/x86/um/nommu/do_syscall_64.c
@@ -2,10 +2,38 @@
 
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <asm/fsgsbase.h>
+#include <asm/prctl.h>
 #include <kern_util.h>
 #include <sysdep/syscalls.h>
 #include <os.h>
 
+static int os_x86_arch_prctl(int pid, int option, unsigned long *arg2)
+{
+	if (!host_has_fsgsbase)
+		return os_arch_prctl(pid, option, arg2);
+
+	switch (option) {
+	case ARCH_SET_FS:
+		wrfsbase(*arg2);
+		break;
+	case ARCH_SET_GS:
+		wrgsbase(*arg2);
+		break;
+	case ARCH_GET_FS:
+		*arg2 = rdfsbase();
+		break;
+	case ARCH_GET_GS:
+		*arg2 = rdgsbase();
+		break;
+	default:
+		pr_warn("%s: unsupported option: 0x%x", __func__, option);
+		break;
+	}
+
+	return 0;
+}
+
 __visible void do_syscall_64(struct pt_regs *regs)
 {
 	int syscall;
@@ -17,6 +45,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
 		 syscall, (unsigned long)current,
 		 (unsigned long)sys_call_table[syscall]);
 
+	/* set fs register to the original host one */
+	os_x86_arch_prctl(0, ARCH_SET_FS, (void *)host_fs);
+
 	if (likely(syscall < NR_syscalls)) {
 		PT_REGS_SET_SYSCALL_RETURN(regs,
 				EXECUTE_SYSCALL(syscall, regs));
@@ -34,4 +65,10 @@ __visible void do_syscall_64(struct pt_regs *regs)
 	/* force do_signal() --> is_syscall() */
 	set_thread_flag(TIF_SIGPENDING);
 	interrupt_end();
+
+	/* restore back fs register to userspace configured one */
+	os_x86_arch_prctl(0, ARCH_SET_FS,
+		      (void *)(current->thread.regs.regs.gp[FS_BASE
+						     / sizeof(unsigned long)]));
+
 }
diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c
index c78c442aed1d..5bb6d55b4bb5 100644
--- a/arch/x86/um/nommu/syscalls_64.c
+++ b/arch/x86/um/nommu/syscalls_64.c
@@ -13,8 +13,70 @@
 #include <asm/prctl.h> /* XXX This should get the constants from libc */
 #include <registers.h>
 #include <os.h>
+#include <asm/thread_info.h>
+#include <asm/mman.h>
 #include "syscalls.h"
 
+/*
+ * The guest libc can change FS, which confuses the host libc.
+ * In fact, changing FS directly is not supported (check
+ * man arch_prctl). So, whenever we make a host syscall,
+ * we should be changing FS to the original FS (not the
+ * one set by the guest libc). This original FS is stored
+ * in host_fs.
+ */
+long long host_fs = -1;
+
+long arch_prctl(struct task_struct *task, int option,
+		unsigned long __user *arg2)
+{
+	long ret = -EINVAL;
+	unsigned long *ptr = arg2, tmp;
+
+	switch (option) {
+	case ARCH_SET_FS:
+		if (host_fs == -1)
+			os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs);
+		ret = 0;
+		break;
+	case ARCH_SET_GS:
+		ret = 0;
+		break;
+	case ARCH_GET_FS:
+	case ARCH_GET_GS:
+		ptr = &tmp;
+		break;
+	}
+
+	ret = os_arch_prctl(0, option, ptr);
+	if (ret)
+		return ret;
+
+	switch (option) {
+	case ARCH_SET_FS:
+		current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] =
+			(unsigned long) arg2;
+		break;
+	case ARCH_SET_GS:
+		current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] =
+			(unsigned long) arg2;
+		break;
+	case ARCH_GET_FS:
+		ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
+		break;
+	case ARCH_GET_GS:
+		ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
+		break;
+	}
+
+	return ret;
+}
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+	return arch_prctl(current, option, (unsigned long __user *) arg2);
+}
+
 void arch_switch_to(struct task_struct *to)
 {
 	/*
@@ -42,3 +104,12 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 
 	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
+
+static int __init um_nommu_setup_hostfs(void)
+{
+	/* initialize the host_fs value at boottime */
+	os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs);
+
+	return 0;
+}
+arch_initcall(um_nommu_setup_hostfs);
-- 
2.43.0
Re: [PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation
Posted by Benjamin Berg 7 months, 3 weeks ago
On Thu, 2025-06-19 at 10:04 +0900, Hajime Tazaki wrote:
> As userspace on UML/!MMU also need to configure %fs register when it is
> running to correctly access thread structure, host syscalls implemented
> in os-Linux drivers may be puzzled when they are called.  Thus it has to
> configure %fs register via arch_prctl(SET_FS) on every host syscalls.

Really, I still think that we should "just" get rid of libc entirely
inside UML. That would avoid so many weird/potential issues …

Doesn't change the fact that FS/GS needs to be restored when doing
thread switches and such. Though one might be able to do it entirely
within arch_switch_to then.

Benjamin


> 
> Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
> Signed-off-by: Ricardo Koller <ricarkol@google.com>
> ---
>  arch/um/include/shared/os.h       |  6 +++
>  arch/um/os-Linux/process.c        |  6 +++
>  arch/um/os-Linux/start_up.c       | 21 +++++++++
>  arch/x86/um/nommu/do_syscall_64.c | 37 ++++++++++++++++
>  arch/x86/um/nommu/syscalls_64.c   | 71 +++++++++++++++++++++++++++++++
>  5 files changed, 141 insertions(+)
> 
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 1251f08e26d0..7c6a8bc0447c 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -189,6 +189,7 @@ extern void check_host_supports_tls(int *supports_tls, int *tls_min);
>  extern void get_host_cpu_features(
>  	void (*flags_helper_func)(char *line),
>  	void (*cache_helper_func)(char *line));
> +extern int host_has_fsgsbase;
>  
>  /* mem.c */
>  extern int create_mem_file(unsigned long long len);
> @@ -213,6 +214,11 @@ extern int os_protect_memory(void *addr, unsigned long len,
>  extern int os_unmap_memory(void *addr, int len);
>  extern int os_drop_memory(void *addr, int length);
>  extern int can_drop_memory(void);
> +extern int os_arch_prctl(int pid, int option, unsigned long *arg);
> +#ifndef CONFIG_MMU
> +extern long long host_fs;
> +#endif
> +
>  
>  void os_set_pdeathsig(void);
>  
> diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
> index 8a1ab59a089f..3a6d34ccd12b 100644
> --- a/arch/um/os-Linux/process.c
> +++ b/arch/um/os-Linux/process.c
> @@ -16,6 +16,7 @@
>  #include <sys/prctl.h>
>  #include <sys/wait.h>
>  #include <asm/unistd.h>
> +#include <sys/syscall.h>   /* For SYS_xxx definitions */
>  #include <linux/threads.h>
>  #include <init.h>
>  #include <longjmp.h>
> @@ -178,6 +179,11 @@ int __init can_drop_memory(void)
>  	return ok;
>  }
>  
> +int os_arch_prctl(int pid, int option, unsigned long *arg2)
> +{
> +	return syscall(SYS_arch_prctl, option, arg2);
> +}
> +
>  void init_new_thread_signals(void)
>  {
>  	set_handler(SIGSEGV);
> diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
> index 4e1f05360c49..55dd92bd2a0b 100644
> --- a/arch/um/os-Linux/start_up.c
> +++ b/arch/um/os-Linux/start_up.c
> @@ -20,6 +20,8 @@
>  #include <sys/resource.h>
>  #include <asm/ldt.h>
>  #include <asm/unistd.h>
> +#include <sys/auxv.h>
> +#include <asm/hwcap2.h>
>  #include <init.h>
>  #include <os.h>
>  #include <kern_util.h>
> @@ -36,6 +38,8 @@
>  #include <skas.h>
>  #include "internal.h"
>  
> +int host_has_fsgsbase;
> +
>  static void ptrace_child(void)
>  {
>  	int ret;
> @@ -459,6 +463,20 @@ __uml_setup("seccomp=", uml_seccomp_config,
>  "    This is insecure and should only be used with a trusted userspace\n\n"
>  );
>  
> +static void __init check_fsgsbase(void)
> +{
> +	unsigned long auxv = getauxval(AT_HWCAP2);
> +
> +	os_info("Checking FSGSBASE instructions...");
> +	if (auxv & HWCAP2_FSGSBASE) {
> +		host_has_fsgsbase = 1;
> +		os_info("OK\n");
> +	} else {
> +		host_has_fsgsbase = 0;
> +		os_info("disabled\n");
> +	}
> +}
> +
>  void __init os_early_checks(void)
>  {
>  	int pid;
> @@ -484,6 +502,9 @@ void __init os_early_checks(void)
>  	using_seccomp = 0;
>  	check_ptrace();
>  
> +	/* probe fsgsbase instruction */
> +	check_fsgsbase();
> +
>  	pid = start_ptraced_child();
>  	if (init_pid_registers(pid))
>  		fatal("Failed to initialize default registers");
> diff --git a/arch/x86/um/nommu/do_syscall_64.c b/arch/x86/um/nommu/do_syscall_64.c
> index 5d0fa83e7fdc..796beb0089fc 100644
> --- a/arch/x86/um/nommu/do_syscall_64.c
> +++ b/arch/x86/um/nommu/do_syscall_64.c
> @@ -2,10 +2,38 @@
>  
>  #include <linux/kernel.h>
>  #include <linux/ptrace.h>
> +#include <asm/fsgsbase.h>
> +#include <asm/prctl.h>
>  #include <kern_util.h>
>  #include <sysdep/syscalls.h>
>  #include <os.h>
>  
> +static int os_x86_arch_prctl(int pid, int option, unsigned long *arg2)
> +{
> +	if (!host_has_fsgsbase)
> +		return os_arch_prctl(pid, option, arg2);
> +
> +	switch (option) {
> +	case ARCH_SET_FS:
> +		wrfsbase(*arg2);
> +		break;
> +	case ARCH_SET_GS:
> +		wrgsbase(*arg2);
> +		break;
> +	case ARCH_GET_FS:
> +		*arg2 = rdfsbase();
> +		break;
> +	case ARCH_GET_GS:
> +		*arg2 = rdgsbase();
> +		break;
> +	default:
> +		pr_warn("%s: unsupported option: 0x%x", __func__, option);
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
>  __visible void do_syscall_64(struct pt_regs *regs)
>  {
>  	int syscall;
> @@ -17,6 +45,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
>  		 syscall, (unsigned long)current,
>  		 (unsigned long)sys_call_table[syscall]);
>  
> +	/* set fs register to the original host one */
> +	os_x86_arch_prctl(0, ARCH_SET_FS, (void *)host_fs);
> +
>  	if (likely(syscall < NR_syscalls)) {
>  		PT_REGS_SET_SYSCALL_RETURN(regs,
>  				EXECUTE_SYSCALL(syscall, regs));
> @@ -34,4 +65,10 @@ __visible void do_syscall_64(struct pt_regs *regs)
>  	/* force do_signal() --> is_syscall() */
>  	set_thread_flag(TIF_SIGPENDING);
>  	interrupt_end();
> +
> +	/* restore back fs register to userspace configured one */
> +	os_x86_arch_prctl(0, ARCH_SET_FS,
> +		      (void *)(current->thread.regs.regs.gp[FS_BASE
> +						     / sizeof(unsigned long)]));
> +
>  }
> diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c
> index c78c442aed1d..5bb6d55b4bb5 100644
> --- a/arch/x86/um/nommu/syscalls_64.c
> +++ b/arch/x86/um/nommu/syscalls_64.c
> @@ -13,8 +13,70 @@
>  #include <asm/prctl.h> /* XXX This should get the constants from libc */
>  #include <registers.h>
>  #include <os.h>
> +#include <asm/thread_info.h>
> +#include <asm/mman.h>
>  #include "syscalls.h"
>  
> +/*
> + * The guest libc can change FS, which confuses the host libc.
> + * In fact, changing FS directly is not supported (check
> + * man arch_prctl). So, whenever we make a host syscall,
> + * we should be changing FS to the original FS (not the
> + * one set by the guest libc). This original FS is stored
> + * in host_fs.
> + */
> +long long host_fs = -1;
> +
> +long arch_prctl(struct task_struct *task, int option,
> +		unsigned long __user *arg2)
> +{
> +	long ret = -EINVAL;
> +	unsigned long *ptr = arg2, tmp;
> +
> +	switch (option) {
> +	case ARCH_SET_FS:
> +		if (host_fs == -1)
> +			os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs);
> +		ret = 0;
> +		break;
> +	case ARCH_SET_GS:
> +		ret = 0;
> +		break;
> +	case ARCH_GET_FS:
> +	case ARCH_GET_GS:
> +		ptr = &tmp;
> +		break;
> +	}
> +
> +	ret = os_arch_prctl(0, option, ptr);
> +	if (ret)
> +		return ret;
> +
> +	switch (option) {
> +	case ARCH_SET_FS:
> +		current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] =
> +			(unsigned long) arg2;
> +		break;
> +	case ARCH_SET_GS:
> +		current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] =
> +			(unsigned long) arg2;
> +		break;
> +	case ARCH_GET_FS:
> +		ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
> +		break;
> +	case ARCH_GET_GS:
> +		ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
> +{
> +	return arch_prctl(current, option, (unsigned long __user *) arg2);
> +}
> +
>  void arch_switch_to(struct task_struct *to)
>  {
>  	/*
> @@ -42,3 +104,12 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
>  
>  	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
>  }
> +
> +static int __init um_nommu_setup_hostfs(void)
> +{
> +	/* initialize the host_fs value at boottime */
> +	os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs);
> +
> +	return 0;
> +}
> +arch_initcall(um_nommu_setup_hostfs);
Re: [PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation
Posted by Hajime Tazaki 7 months, 3 weeks ago
On Thu, 19 Jun 2025 19:40:49 +0900,
Benjamin Berg wrote:
> 
> On Thu, 2025-06-19 at 10:04 +0900, Hajime Tazaki wrote:
> > As userspace on UML/!MMU also need to configure %fs register when it is
> > running to correctly access thread structure, host syscalls implemented
> > in os-Linux drivers may be puzzled when they are called.  Thus it has to
> > configure %fs register via arch_prctl(SET_FS) on every host syscalls.
> 
> Really, I still think that we should "just" get rid of libc entirely
> inside UML. That would avoid so many weird/potential issues …

I'm not sure if I understand your point.

Q1) what do you mean by 'get rid of libc entirely' here ?
    do you mean the following code block adds the dependency ?
+ int os_arch_prctl(int pid, int option, unsigned long *arg2)
+ {
+ 	return syscall(SYS_arch_prctl, option, arg2);
+ }

I guess this can be replaced with inline assembly instead of using
libc's one.  but this is the code under os-Linux, which I thought we're
allowed to use the host code ?

Q2) "That would avoid so many weird/potential issues …"
I'm new to this;  I'm wondering what kind of issues did you see ?

> Doesn't change the fact that FS/GS needs to be restored when doing
> thread switches and such. Though one might be able to do it entirely
> within arch_switch_to then.

I believe this is already done in arch_switch_to.  This particular
patch does the control to the host context.

-- Hajime
Re: [PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation
Posted by Benjamin Berg 7 months, 3 weeks ago
On Thu, 2025-06-19 at 21:22 +0900, Hajime Tazaki wrote:
> 
> On Thu, 19 Jun 2025 19:40:49 +0900,
> Benjamin Berg wrote:
> > 
> > On Thu, 2025-06-19 at 10:04 +0900, Hajime Tazaki wrote:
> > > As userspace on UML/!MMU also need to configure %fs register when
> > > it is
> > > running to correctly access thread structure, host syscalls
> > > implemented
> > > in os-Linux drivers may be puzzled when they are called.  Thus it
> > > has to
> > > configure %fs register via arch_prctl(SET_FS) on every host
> > > syscalls.
> > 
> > Really, I still think that we should "just" get rid of libc
> > entirely
> > inside UML. That would avoid so many weird/potential issues …
> 
> I'm not sure if I understand your point.
> 
> Q1) what do you mean by 'get rid of libc entirely' here ?
>     do you mean the following code block adds the dependency ?
> + int os_arch_prctl(int pid, int option, unsigned long *arg2)
> + {
> + 	return syscall(SYS_arch_prctl, option, arg2);
> + }
> 
> I guess this can be replaced with inline assembly instead of using
> libc's one.  but this is the code under os-Linux, which I thought
> we're
> allowed to use the host code ?
> 
> Q2) "That would avoid so many weird/potential issues …"
> I'm new to this;  I'm wondering what kind of issues did you see ?

Oh, I am just being annoyed by libc in general in UM. It isn't specific
to this patchset.

An example is that we need to keep malloc() working for libc. Which I
would think is kind of weird. Or we had issues because libc turned on
rseq and that was inherited into userspace, causing random crashes and
such.

> > Doesn't change the fact that FS/GS needs to be restored when doing
> > thread switches and such. Though one might be able to do it
> > entirely
> > within arch_switch_to then.
> 
> I believe this is already done in arch_switch_to.  This particular
> patch does the control to the host context.

OK, need to look at that again a bit. I haven't really wrapped my mind
around how everything fits together, so I probably got some stuff
wrong.

Benjamin
Re: [PATCH v9 07/13] um: nommu: configure fs register on host syscall invocation
Posted by Hajime Tazaki 7 months, 3 weeks ago
On Thu, 19 Jun 2025 21:38:47 +0900,
Benjamin Berg wrote:
> 
> On Thu, 2025-06-19 at 21:22 +0900, Hajime Tazaki wrote:
> > 
> > On Thu, 19 Jun 2025 19:40:49 +0900,
> > Benjamin Berg wrote:
> > > 
> > > On Thu, 2025-06-19 at 10:04 +0900, Hajime Tazaki wrote:
> > > > As userspace on UML/!MMU also need to configure %fs register when
> > > > it is
> > > > running to correctly access thread structure, host syscalls
> > > > implemented
> > > > in os-Linux drivers may be puzzled when they are called.  Thus it
> > > > has to
> > > > configure %fs register via arch_prctl(SET_FS) on every host
> > > > syscalls.
> > > 
> > > Really, I still think that we should "just" get rid of libc
> > > entirely
> > > inside UML. That would avoid so many weird/potential issues …
> > 
> > I'm not sure if I understand your point.
> > 
> > Q1) what do you mean by 'get rid of libc entirely' here ?
> >     do you mean the following code block adds the dependency ?
> > + int os_arch_prctl(int pid, int option, unsigned long *arg2)
> > + {
> > + 	return syscall(SYS_arch_prctl, option, arg2);
> > + }
> > 
> > I guess this can be replaced with inline assembly instead of using
> > libc's one.  but this is the code under os-Linux, which I thought
> > we're
> > allowed to use the host code ?
> > 
> > Q2) "That would avoid so many weird/potential issues …"
> > I'm new to this;  I'm wondering what kind of issues did you see ?
> 
> Oh, I am just being annoyed by libc in general in UM. It isn't specific
> to this patchset.
> 
> An example is that we need to keep malloc() working for libc. Which I
> would think is kind of weird. Or we had issues because libc turned on
> rseq and that was inherited into userspace, causing random crashes and
> such.

I understand, thanks for the input.

> > > Doesn't change the fact that FS/GS needs to be restored when doing
> > > thread switches and such. Though one might be able to do it
> > > entirely
> > > within arch_switch_to then.
> > 
> > I believe this is already done in arch_switch_to.  This particular
> > patch does the control to the host context.
> 
> OK, need to look at that again a bit. I haven't really wrapped my mind
> around how everything fits together, so I probably got some stuff
> wrong.

anyway, thanks for your time to look at this.

-- Hajime