linux-user/aarch64/signal.c | 5 +++ linux-user/alpha/signal.c | 6 +++ linux-user/arm/signal.c | 5 +++ linux-user/hexagon/signal.c | 5 +++ linux-user/hppa/signal.c | 5 +++ linux-user/i386/signal.c | 10 +++++ linux-user/loongarch64/signal.c | 5 +++ linux-user/m68k/signal.c | 5 +++ linux-user/microblaze/signal.c | 5 +++ linux-user/mips/signal.c | 10 +++++ linux-user/openrisc/signal.c | 5 +++ linux-user/ppc/signal.c | 10 +++++ linux-user/riscv/signal.c | 5 +++ linux-user/s390x/signal.c | 5 +++ linux-user/sh4/signal.c | 5 +++ linux-user/sparc/signal.c | 10 +++++ linux-user/xtensa/signal.c | 5 +++ linux-user/main.c | 2 + linux-user/qemu.h | 5 +++ linux-user/signal-common.h | 1 + linux-user/syscall.c | 74 ++++++++++++++++++++++++++++++++- linux-user/syscall_defs.h | 6 +++ 22 files changed, 193 insertions(+), 1 deletion(-)
This commit adds support for the `prctl(PR_SET_SYSCALL_USER_DISPATCH)`
function in the Linux userspace emulator.
It is implemented as a fully host-independent function, by forcing
a SIGSYS early during syscall handling, if the PC is outside the
allowed range.
Since disabled SUD is indistinguishable from enabled SUD with
always-allowed region length == ~0, this encoding is used
instead of introducing a new flag.
Tested on [uglendix][1], will probably also apply to software like
tiny-wine, rpcsx, limbo, lazypoline, vicar, sysfail and endokernel,
to name a few.
[1]: https://sr.ht/~arusekk/uglendix
Signed-off-by: Arusekk <floss@arusekk.pl>
---
Changes from v2: braces, vaddr TaskState->sys_dispatch[_selector],
factor out do_prctl_syscall_user_dispatch,
access_ok instead of dummy get_u8, improve hot path, reword.
The files are not in the alphabetic order, architecture-specific
changes are moved to top for easier review.
v2: https://lore.kernel.org/qemu-devel/20250604121437.28176-4-floss@arusekk.pl/
linux-user/aarch64/signal.c | 5 +++
linux-user/alpha/signal.c | 6 +++
linux-user/arm/signal.c | 5 +++
linux-user/hexagon/signal.c | 5 +++
linux-user/hppa/signal.c | 5 +++
linux-user/i386/signal.c | 10 +++++
linux-user/loongarch64/signal.c | 5 +++
linux-user/m68k/signal.c | 5 +++
linux-user/microblaze/signal.c | 5 +++
linux-user/mips/signal.c | 10 +++++
linux-user/openrisc/signal.c | 5 +++
linux-user/ppc/signal.c | 10 +++++
linux-user/riscv/signal.c | 5 +++
linux-user/s390x/signal.c | 5 +++
linux-user/sh4/signal.c | 5 +++
linux-user/sparc/signal.c | 10 +++++
linux-user/xtensa/signal.c | 5 +++
linux-user/main.c | 2 +
linux-user/qemu.h | 5 +++
linux-user/signal-common.h | 1 +
linux-user/syscall.c | 74 ++++++++++++++++++++++++++++++++-
linux-user/syscall_defs.h | 6 +++
22 files changed, 193 insertions(+), 1 deletion(-)
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index d50cab78d8..da2c039544 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -746,3 +746,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = sigtramp_page;
unlock_user(tramp, sigtramp_page, 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index 896c2c148a..48d6e4f790 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -275,3 +275,9 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 6 * 4);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_sigreturn + 3 * 4
+ || pc == default_rt_sigreturn + 3 * 4;
+}
diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index 8db1c4b233..53430908cd 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -644,3 +644,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, total_size);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return (abi_ulong)(pc - default_sigreturn) <= 8 * RETCODE_BYTES;
+}
diff --git a/linux-user/hexagon/signal.c b/linux-user/hexagon/signal.c
index 492b51f155..751155006e 100644
--- a/linux-user/hexagon/signal.c
+++ b/linux-user/hexagon/signal.c
@@ -291,3 +291,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 4 * 2);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 4 * 2;
+}
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index f6f094c960..e17e3527b4 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -216,3 +216,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = (sigtramp_page + 8) | 3;
unlock_user(tramp, sigtramp_page, 6*4);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == (default_rt_sigreturn & ~3) + 4*4;
+}
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 0f11dba831..a8302c0b1e 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -865,3 +865,13 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 2 * 8);
}
#endif
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+#ifndef TARGET_X86_64
+ if (pc == default_sigreturn + 8 || pc == default_rt_sigreturn + 7) {
+ return true;
+ }
+#endif
+ return false;
+}
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index 1a322f9697..fedf297ca8 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -452,3 +452,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = sigtramp_page;
unlock_user(tramp, sigtramp_page, 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/m68k/signal.c b/linux-user/m68k/signal.c
index 77555781aa..c6af2baee9 100644
--- a/linux-user/m68k/signal.c
+++ b/linux-user/m68k/signal.c
@@ -408,3 +408,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 4 + 6);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_sigreturn + 4 || pc == default_rt_sigreturn + 6;
+}
diff --git a/linux-user/microblaze/signal.c b/linux-user/microblaze/signal.c
index f6d47d76ff..2027858025 100644
--- a/linux-user/microblaze/signal.c
+++ b/linux-user/microblaze/signal.c
@@ -230,3 +230,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = sigtramp_page;
unlock_user(tramp, sigtramp_page, 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index d69a5d73dd..73914e001f 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -393,3 +393,13 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 2 * 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+ if (pc == default_sigreturn + 8) {
+ return true;
+ }
+#endif
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index cb74a9fe5e..c368069980 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -175,3 +175,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = sigtramp_page;
unlock_user(tramp, sigtramp_page, 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index 24e5a02a78..7516fccfeb 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -730,3 +730,13 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 2 * 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+ if (pc == default_sigreturn + 8) {
+ return true;
+ }
+#endif
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 358fa1d82d..5ae889e8b9 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -213,3 +213,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
default_rt_sigreturn = sigtramp_page;
unlock_user(tramp, sigtramp_page, 8);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 8;
+}
diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index df49c24708..17a194104f 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -419,3 +419,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 2 + 2);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_sigreturn + 2 || pc == default_rt_sigreturn + 2;
+}
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index 9ecc026fae..1c34dd27c1 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -346,3 +346,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
unlock_user(tramp, sigtramp_page, 2 * 6);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_sigreturn + 6 || pc == default_rt_sigreturn + 6;
+}
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index 8181b8b92c..d7121d1871 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -789,3 +789,13 @@ do_sigsegv:
force_sig(TARGET_SIGSEGV);
}
#endif /* TARGET_SPARC64 */
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+#ifdef TARGET_ABI32
+ if (pc == default_sigreturn + 8 || pc == default_rt_sigreturn + 8) {
+ return true;
+ }
+#endif
+ return false;
+}
diff --git a/linux-user/xtensa/signal.c b/linux-user/xtensa/signal.c
index ef8b0c3a27..0ce3d826d7 100644
--- a/linux-user/xtensa/signal.c
+++ b/linux-user/xtensa/signal.c
@@ -303,3 +303,8 @@ void setup_sigtramp(abi_ulong sigtramp_page)
install_sigtramp(tramp);
unlock_user(tramp, sigtramp_page, 6);
}
+
+bool is_vdso_sigreturn(abi_ulong pc)
+{
+ return pc == default_rt_sigreturn + 6;
+}
diff --git a/linux-user/main.c b/linux-user/main.c
index a9142ee726..420f91c023 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -231,6 +231,8 @@ void init_task_state(TaskState *ts)
ts->start_boottime += bt.tv_nsec * (uint64_t) ticks_per_sec /
NANOSECONDS_PER_SECOND;
}
+
+ ts->sys_dispatch_len = (abi_ulong)~0ULL;
}
CPUArchState *cpu_copy(CPUArchState *env)
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 0b19fa43e6..d5e28a0a4e 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -161,6 +161,11 @@ struct TaskState {
/* This thread's sigaltstack, if it has one */
struct target_sigaltstack sigaltstack_used;
+ /* This thread's SYSCALL_USER_DISPATCH state, len=~0 means disabled */
+ vaddr sys_dispatch;
+ abi_ulong sys_dispatch_len;
+ vaddr sys_dispatch_selector;
+
/* Start time of task after system boot in clock ticks */
uint64_t start_boottime;
};
diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h
index 196d2406f8..7b17ac3221 100644
--- a/linux-user/signal-common.h
+++ b/linux-user/signal-common.h
@@ -27,6 +27,7 @@ extern abi_ulong default_sigreturn;
extern abi_ulong default_rt_sigreturn;
void setup_sigtramp(abi_ulong tramp_page);
+bool is_vdso_sigreturn(abi_ulong pc);
int on_sig_stack(unsigned long sp);
int sas_ss_flags(unsigned long sp);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index fc37028597..c520ae4031 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6340,6 +6340,10 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
#endif
#ifndef PR_SET_SYSCALL_USER_DISPATCH
# define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif
#ifndef PR_SME_SET_VL
# define PR_SME_SET_VL 63
@@ -6394,6 +6398,37 @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
#define do_prctl_sme_set_vl do_prctl_inval1
#endif
+static abi_long do_prctl_syscall_user_dispatch(CPUArchState* env, abi_long arg2,
+ abi_ulong arg3, abi_ulong arg4,
+ abi_ulong arg5)
+{
+ CPUState *cpu = env_cpu(env);
+ TaskState *ts = get_task_state(cpu);
+ switch (arg2) {
+ case PR_SYS_DISPATCH_OFF:
+ if (arg3 || arg4 || arg5) {
+ return -TARGET_EINVAL;
+ }
+ ts->sys_dispatch_len = (abi_ulong)~0ULL;
+ return 0;
+ case PR_SYS_DISPATCH_ON:
+ {
+ if (arg3 && arg3 + arg4 <= arg3) {
+ return -TARGET_EINVAL;
+ }
+ if (arg5 && !access_ok(cpu, VERIFY_READ, arg5, 1)) {
+ return -TARGET_EFAULT;
+ }
+ ts->sys_dispatch = arg3;
+ ts->sys_dispatch_len = arg4;
+ ts->sys_dispatch_selector = arg5;
+ return 0;
+ }
+ default:
+ return -TARGET_EINVAL;
+ }
+}
+
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
abi_long arg3, abi_long arg4, abi_long arg5)
{
@@ -6469,6 +6504,9 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
case PR_SET_UNALIGN:
return do_prctl_set_unalign(env, arg2);
+ case PR_SET_SYSCALL_USER_DISPATCH:
+ return do_prctl_syscall_user_dispatch(env, arg2, arg3, arg4, arg5);
+
case PR_CAP_AMBIENT:
case PR_CAPBSET_READ:
case PR_CAPBSET_DROP:
@@ -6523,7 +6561,6 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
case PR_SET_MM:
case PR_GET_SECCOMP:
case PR_SET_SECCOMP:
- case PR_SET_SYSCALL_USER_DISPATCH:
case PR_GET_THP_DISABLE:
case PR_SET_THP_DISABLE:
case PR_GET_TSC:
@@ -13881,12 +13918,39 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
return ret;
}
+static bool sys_dispatch(abi_ulong pc, TaskState *ts)
+{
+ if (likely(pc - ts->sys_dispatch < ts->sys_dispatch_len)) {
+ return false;
+ }
+ if (unlikely(is_vdso_sigreturn(pc))) {
+ return false;
+ }
+ if (likely(ts->sys_dispatch_selector)) {
+ uint8_t sb;
+ if (get_user_u8(sb, ts->sys_dispatch_selector)) {
+ force_sig(TARGET_SIGSEGV);
+ return true;
+ }
+ if (likely(sb == SYSCALL_DISPATCH_FILTER_ALLOW)) {
+ return false;
+ }
+ if (unlikely(sb != SYSCALL_DISPATCH_FILTER_BLOCK)) {
+ force_sig(TARGET_SIGSYS);
+ return true;
+ }
+ }
+ force_sig_fault(TARGET_SIGSYS, TARGET_SYS_USER_DISPATCH, pc);
+ return true;
+}
+
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
abi_long arg2, abi_long arg3, abi_long arg4,
abi_long arg5, abi_long arg6, abi_long arg7,
abi_long arg8)
{
CPUState *cpu = env_cpu(cpu_env);
+ TaskState *ts = get_task_state(cpu);
abi_long ret;
#ifdef DEBUG_ERESTARTSYS
@@ -13903,6 +13967,14 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
}
#endif
+ /* Save an indirect call on syscall hot path */
+ if (unlikely(ts->sys_dispatch_len != (abi_ulong)~0ULL)) {
+ vaddr pc = cpu->cc->get_pc(cpu);
+ if (sys_dispatch(pc, ts)) {
+ return -QEMU_ESIGRETURN;
+ }
+ }
+
record_syscall_start(cpu, num, arg1,
arg2, arg3, arg4, arg5, arg6, arg7, arg8);
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 5d22759992..7812253455 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -693,6 +693,12 @@ typedef struct target_siginfo {
#define TARGET_TRAP_HWBKPT (4) /* hardware breakpoint/watchpoint */
#define TARGET_TRAP_UNK (5) /* undiagnosed trap */
+/*
+ * SIGSYS si_codes
+ */
+#define TARGET_SYS_SECCOMP (1) /* seccomp triggered */
+#define TARGET_SYS_USER_DISPATCH (2) /* syscall user dispatch triggered */
+
/*
* SIGEMT si_codes
*/
--
2.50.0
© 2016 - 2025 Red Hat, Inc.