The current api from safe_syscall_base() is to return -errno, which is
the interface provided by *some* linux kernel abis. The wrapper macro,
safe_syscall(), detects error, stores into errno, and returns -1, to
match the api of the system syscall().
For those kernel abis that do not return -errno natively, this leads
to double syscall error detection. E.g. Linux ppc64, which sets the
SO flag for error.
Simplify the usage from C by moving the error detection into assembly.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/safe-syscall.h | 20 +++---
common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
common-user/host/arm/safe-syscall.inc.S | 58 ++++++++++-------
common-user/host/i386/safe-syscall.inc.S | 51 +++++++++------
common-user/host/ppc64/safe-syscall.inc.S | 63 +++++++++++--------
common-user/host/riscv/safe-syscall.inc.S | 50 +++++++++------
common-user/host/s390x/safe-syscall.inc.S | 50 +++++++++------
common-user/host/x86_64/safe-syscall.inc.S | 70 ++++++++++++---------
8 files changed, 243 insertions(+), 174 deletions(-)
diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
index aaa9ffc0e2..ea0e8a8d24 100644
--- a/linux-user/safe-syscall.h
+++ b/linux-user/safe-syscall.h
@@ -125,23 +125,17 @@
* kinds of restartability.
*/
#ifdef HAVE_SAFE_SYSCALL
-/* The core part of this function is implemented in assembly */
-extern long safe_syscall_base(int *pending, long number, ...);
+
+/* The core part of this function is implemented in assembly. */
+extern long safe_syscall_base(int *pending, int *errnop, long number, ...);
+
/* These are defined by the safe-syscall.inc.S file */
extern char safe_syscall_start[];
extern char safe_syscall_end[];
-#define safe_syscall(...) \
- ({ \
- long ret_; \
- int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
- ret_ = safe_syscall_base(psp_, __VA_ARGS__); \
- if (is_error(ret_)) { \
- errno = -ret_; \
- ret_ = -1; \
- } \
- ret_; \
- })
+#define safe_syscall(...) \
+ safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
+ &errno, __VA_ARGS__)
#else
diff --git a/common-user/host/aarch64/safe-syscall.inc.S b/common-user/host/aarch64/safe-syscall.inc.S
index bc1f5a9792..95c60d8609 100644
--- a/common-user/host/aarch64/safe-syscall.inc.S
+++ b/common-user/host/aarch64/safe-syscall.inc.S
@@ -17,22 +17,21 @@
.type safe_syscall_start, #function
.type safe_syscall_end, #function
- /* This is the entry point for making a system call. The calling
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.cfi_startproc
- /* The syscall calling convention isn't the same as the
- * C one:
+ /*
+ * The syscall calling convention isn't the same as the C one:
* we enter with x0 == *signal_pending
- * x1 == syscall number
- * x2 ... x7, (stack) == syscall arguments
+ * x1 == errno
+ * x2 == syscall number
+ * x3 ... x7, (stack) == syscall arguments
* and return the result in x0
* and the syscall instruction needs
* x8 == syscall number
@@ -40,17 +39,18 @@ safe_syscall_base:
* and returns the result in x0
* Shuffle everything around appropriately.
*/
- mov x9, x0 /* signal_pending pointer */
- mov x8, x1 /* syscall number */
- mov x0, x2 /* syscall arguments */
- mov x1, x3
- mov x2, x4
- mov x3, x5
- mov x4, x6
- mov x5, x7
- ldr x6, [sp]
+ mov x10, x0 /* signal_pending pointer */
+ mov x11, x1 /* errno pointer */
+ mov x8, x2 /* syscall number */
+ mov x0, x3 /* syscall arguments */
+ mov x1, x4
+ mov x2, x5
+ mov x3, x6
+ mov x4, x7
+ ldp x5, x6, [sp]
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -59,17 +59,26 @@ safe_syscall_base:
*/
safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
- ldr w10, [x9]
- cbnz w10, 0f
+ ldr w9, [x10]
+ cbnz w9, 2f
svc 0x0
safe_syscall_end:
+
/* code path for having successfully executed the syscall */
+ cmn x0, #4095
+ b.cs 1f
ret
-0:
- /* code path when we didn't execute the syscall */
- mov x0, #-TARGET_ERESTARTSYS
+ /* code path setting errno */
+0: neg w0, w0 /* create positive errno */
+1: str w0, [x11] /* store errno */
+ mov x0, #-1
ret
+
+ /* code path when we didn't execute the syscall */
+2: mov w0, #TARGET_ERESTARTSYS
+ b 1b
+
.cfi_endproc
.size safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/arm/safe-syscall.inc.S b/common-user/host/arm/safe-syscall.inc.S
index 88c4958504..17839c6486 100644
--- a/common-user/host/arm/safe-syscall.inc.S
+++ b/common-user/host/arm/safe-syscall.inc.S
@@ -22,33 +22,35 @@
.arm
.align 2
- /* This is the entry point for making a system call. The calling
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.fnstart
.cfi_startproc
mov r12, sp /* save entry stack */
- push { r4, r5, r6, r7, r8, lr }
- .save { r4, r5, r6, r7, r8, lr }
- .cfi_adjust_cfa_offset 24
+ push { r4, r5, r6, r7, r8, r9, r10, lr }
+ .save { r4, r5, r6, r7, r8, r9, r10, lr }
+ .cfi_adjust_cfa_offset 32
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
.cfi_rel_offset r8, 16
- .cfi_rel_offset lr, 20
+ .cfi_rel_offset r9, 20
+ .cfi_rel_offset r10, 24
+ .cfi_rel_offset lr, 28
- /* The syscall calling convention isn't the same as the C one:
- * we enter with r0 == *signal_pending
- * r1 == syscall number
- * r2, r3, [sp+0] ... [sp+12] == syscall arguments
+ /*
+ * The syscall calling convention isn't the same as the C one:
+ * we enter with r0 == &signal_pending
+ * r1 == &errno
+ * r2 == syscall number
+ * r3, [sp+0] ... [sp+16] == syscall arguments
* and return the result in r0
* and the syscall instruction needs
* r7 == syscall number
@@ -58,12 +60,13 @@ safe_syscall_base:
* Note the 16 bytes that we pushed to save registers.
*/
mov r8, r0 /* copy signal_pending */
- mov r7, r1 /* syscall number */
- mov r0, r2 /* syscall args */
- mov r1, r3
- ldm r12, { r2, r3, r4, r5, r6 }
+ mov r9, r1 /* copy errnop */
+ mov r7, r2 /* syscall number */
+ mov r0, r3 /* syscall args */
+ ldm r12, { r1, r2, r3, r4, r5, r6 }
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -74,16 +77,25 @@ safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
ldr r12, [r8] /* signal_pending */
tst r12, r12
- bne 1f
+ bne 2f
swi 0
safe_syscall_end:
- /* code path for having successfully executed the syscall */
- pop { r4, r5, r6, r7, r8, pc }
-1:
+ /* code path for having successfully executed the syscall */
+ cmp r0, #-4096
+ bhi 0f
+9: pop { r4, r5, r6, r7, r8, r9, r10, pc }
+
+ /* code path setting errno */
+0: neg r0, r0 /* create positive errno */
+1: str r0, [r9] /* store errno */
+ mov r0, #-1
+ b 9b
+
/* code path when we didn't execute the syscall */
- ldr r0, =-TARGET_ERESTARTSYS
- pop { r4, r5, r6, r7, r8, pc }
+2: ldr r0, =TARGET_ERESTARTSYS
+ b 1b
+
.fnend
.cfi_endproc
diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
index 9e58fc6504..ad89521783 100644
--- a/common-user/host/i386/safe-syscall.inc.S
+++ b/common-user/host/i386/safe-syscall.inc.S
@@ -15,14 +15,12 @@
.global safe_syscall_end
.type safe_syscall_base, @function
- /* This is the entry point for making a system call. The calling
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.cfi_startproc
@@ -41,9 +39,10 @@ safe_syscall_base:
/* The syscall calling convention isn't the same as the C one:
* we enter with 0(%esp) == return address
- * 4(%esp) == *signal_pending
- * 8(%esp) == syscall number
- * 12(%esp) ... 32(%esp) == syscall arguments
+ * 4(%esp) == &signal_pending
+ * 8(%esp) == &errno
+ * 12(%esp) == syscall number
+ * 16(%esp) ... 36(%esp) == syscall arguments
* and return the result in eax
* and the syscall instruction needs
* eax == syscall number
@@ -52,14 +51,15 @@ safe_syscall_base:
* Shuffle everything around appropriately.
* Note the 16 bytes that we pushed to save registers.
*/
- mov 12+16(%esp), %ebx /* the syscall arguments */
- mov 16+16(%esp), %ecx
- mov 20+16(%esp), %edx
- mov 24+16(%esp), %esi
- mov 28+16(%esp), %edi
- mov 32+16(%esp), %ebp
+ mov 16+16(%esp), %ebx /* the syscall arguments */
+ mov 20+16(%esp), %ecx
+ mov 24+16(%esp), %edx
+ mov 28+16(%esp), %esi
+ mov 32+16(%esp), %edi
+ mov 36+16(%esp), %ebp
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -70,12 +70,16 @@ safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
mov 4+16(%esp), %eax /* signal_pending */
cmpl $0, (%eax)
- jnz 1f
+ jnz 2f
mov 8+16(%esp), %eax /* syscall number */
int $0x80
safe_syscall_end:
+
/* code path for having successfully executed the syscall */
- pop %ebx
+ cmp $-4095, %eax
+ jae 0f
+
+9: pop %ebx
.cfi_remember_state
.cfi_adjust_cfa_offset -4
.cfi_restore ebx
@@ -90,11 +94,18 @@ safe_syscall_end:
.cfi_restore ebp
ret
-1:
- /* code path when we didn't execute the syscall */
.cfi_restore_state
- mov $-TARGET_ERESTARTSYS, %eax
- jmp safe_syscall_end
+
+ /* code path setting errno */
+0: neg %eax /* create positive errno */
+1: mov 8+16(%esp), %ebx /* load errno pointer */
+ mov %eax, (%ebx) /* store errno */
+ mov $-1, %eax
+ jmp 9b
+
+ /* code path when we didn't execute the syscall */
+2: mov $TARGET_ERESTARTSYS, %eax
+ jmp 1b
.cfi_endproc
.size safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/ppc64/safe-syscall.inc.S b/common-user/host/ppc64/safe-syscall.inc.S
index 875133173b..e35408c5fb 100644
--- a/common-user/host/ppc64/safe-syscall.inc.S
+++ b/common-user/host/ppc64/safe-syscall.inc.S
@@ -17,14 +17,19 @@
.text
- /* This is the entry point for making a system call. The calling
+#if _CALL_ELF == 2
+#define PARAM_OFS 32
+#else
+#define PARAM_OFS 48
+#endif
+#define PARAM(X) PARAM_OFS + X*8
+
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
#if _CALL_ELF == 2
safe_syscall_base:
@@ -39,9 +44,11 @@ safe_syscall_base:
.L.safe_syscall_base:
.cfi_startproc
#endif
- /* We enter with r3 == *signal_pending
- * r4 == syscall number
- * r5 ... r10 == syscall arguments
+ /*
+ * We enter with r3 == &signal_pending
+ * r4 == &errno
+ * r5 == syscall number
+ * r6 ... r10, (stack) == syscall arguments
* and return the result in r3
* and the syscall instruction needs
* r0 == syscall number
@@ -49,18 +56,18 @@ safe_syscall_base:
* and returns the result in r3
* Shuffle everything around appropriately.
*/
- std 14, 16(1) /* Preserve r14 in SP+16 */
- .cfi_offset 14, 16
- mr 14, 3 /* signal_pending */
- mr 0, 4 /* syscall number */
- mr 3, 5 /* syscall arguments */
- mr 4, 6
- mr 5, 7
- mr 6, 8
- mr 7, 9
- mr 8, 10
+ mr 11, 3 /* signal_pending pointer */
+ std 4, PARAM(1)(1) /* save errno pointer in param slot */
+ mr 0, 5 /* syscall number */
+ mr 3, 6 /* syscall arguments */
+ mr 4, 7
+ mr 5, 8
+ mr 6, 9
+ mr 7, 10
+ ld 8, PARAM(8)(1)
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -69,23 +76,25 @@ safe_syscall_base:
*/
safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
- lwz 12, 0(14)
+ lwz 12, 0(11)
cmpwi 0, 12, 0
bne- 0f
sc
safe_syscall_end:
- /* code path when we did execute the syscall */
- ld 14, 16(1) /* restore r14 to its original value */
- bnslr+
- /* syscall failed; return negative errno */
- neg 3, 3
+ /* code path for having successfully executed the syscall */
+ bnslr+ /* SO set for syscall error */
+
+ /* code path setting errno */
+1: ld 11, PARAM(1)(1) /* restore errno pointer */
+ stw 3, 0(11) /* store errno */
+ li 3, -1
blr
/* code path when we didn't execute the syscall */
-0: addi 3, 0, -TARGET_ERESTARTSYS
- ld 14, 16(1) /* restore r14 to its original value */
- blr
+0: li 3, TARGET_ERESTARTSYS
+ b 1b
+
.cfi_endproc
#if _CALL_ELF == 2
diff --git a/common-user/host/riscv/safe-syscall.inc.S b/common-user/host/riscv/safe-syscall.inc.S
index 9ca3fbfd1e..eddede702b 100644
--- a/common-user/host/riscv/safe-syscall.inc.S
+++ b/common-user/host/riscv/safe-syscall.inc.S
@@ -23,17 +23,15 @@
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.cfi_startproc
/*
* The syscall calling convention is nearly the same as C:
- * we enter with a0 == *signal_pending
- * a1 == syscall number
- * a2 ... a7 == syscall arguments
+ * we enter with a0 == &signal_pending
+ * a1 == &errno
+ * a2 == syscall number
+ * a3 ... a7, [sp] == syscall arguments
* and return the result in a0
* and the syscall instruction needs
* a7 == syscall number
@@ -42,14 +40,19 @@ safe_syscall_base:
* Shuffle everything around appropriately.
*/
mv t0, a0 /* signal_pending pointer */
- mv t1, a1 /* syscall number */
- mv a0, a2 /* syscall arguments */
- mv a1, a3
- mv a2, a4
- mv a3, a5
- mv a4, a6
- mv a5, a7
- mv a7, t1
+ mv t1, a1 /* errno pointer */
+ mv t2, a2 /* syscall number */
+ mv a0, a3 /* syscall arguments */
+ mv a1, a4
+ mv a2, a5
+ mv a3, a6
+ mv a4, a7
+#if __riscv_xlen == 32
+ lw a5, 0(sp)
+#else
+ ld a5, 0(sp)
+#endif
+ mv a7, t2
/*
* This next sequence of code works in conjunction with the
@@ -61,17 +64,26 @@ safe_syscall_base:
*/
safe_syscall_start:
/* If signal_pending is non-zero, don't do the call */
- lw t1, 0(t0)
- bnez t1, 0f
+ lw t2, 0(t0)
+ bnez t2, 2f
scall
safe_syscall_end:
+
/* code path for having successfully executed the syscall */
+ li t2, -4096
+ bgtu a0, t2, 0f
ret
-0:
- /* code path when we didn't execute the syscall */
- li a0, -TARGET_ERESTARTSYS
+ /* code path setting errno */
+0: neg a0, a0 /* create positive errno */
+1: sw a0, 0(t1) /* store errno */
+ li a0, -1
ret
+
+ /* code path when we didn't execute the syscall */
+2: li a0, TARGET_ERESTARTSYS
+ j 1b
+
.cfi_endproc
.size safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/s390x/safe-syscall.inc.S b/common-user/host/s390x/safe-syscall.inc.S
index 414b44ad38..f2a3bccc13 100644
--- a/common-user/host/s390x/safe-syscall.inc.S
+++ b/common-user/host/s390x/safe-syscall.inc.S
@@ -15,14 +15,12 @@
.global safe_syscall_end
.type safe_syscall_base, @function
- /* This is the entry point for making a system call. The calling
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.cfi_startproc
@@ -44,11 +42,13 @@ safe_syscall_base:
stg %r1,0(%r15) /* store back chain */
stg %r0,8(%r15) /* store eos */
- /* The syscall calling convention isn't the same as the
+ /*
+ * The syscall calling convention isn't the same as the
* C one:
- * we enter with r2 == *signal_pending
- * r3 == syscall number
- * r4, r5, r6, (stack) == syscall arguments
+ * we enter with r2 == &signal_pending
+ * r3 == &errno
+ * r4 == syscall number
+ * r5, r6, (stack) == syscall arguments
* and return the result in r2
* and the syscall instruction needs
* r1 == syscall number
@@ -57,13 +57,14 @@ safe_syscall_base:
* Shuffle everything around appropriately.
*/
lgr %r8,%r2 /* signal_pending pointer */
- lgr %r1,%r3 /* syscall number */
- lgr %r2,%r4 /* syscall args */
- lgr %r3,%r5
- lgr %r4,%r6
- lmg %r5,%r7,320(%r15)
+ lgr %r9,%r3 /* errno pointer */
+ lgr %r1,%r4 /* syscall number */
+ lgr %r2,%r5 /* syscall args */
+ lgr %r3,%r6
+ lmg %r4,%r7,320(%r15)
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -73,18 +74,31 @@ safe_syscall_base:
safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
icm %r0,15,0(%r8)
- jne 2f
+ jne 1f
svc 0
safe_syscall_end:
-1: lg %r15,0(%r15) /* load back chain */
+ /* code path for having successfully executed the syscall */
+ lghi %r0, -4095 /* check for syscall error */
+ clgr %r2, %r0
+ jgnl 0f
+
+9: lg %r15,0(%r15) /* load back chain */
.cfi_remember_state
.cfi_adjust_cfa_offset -160
lmg %r6,%r15,48(%r15) /* load saved registers */
br %r14
.cfi_restore_state
-2: lghi %r2, -TARGET_ERESTARTSYS
- j 1b
+
+ /* code path when we didn't execute the syscall */
+1: lghi %r2, -TARGET_ERESTARTSYS
+
+ /* code path setting errno */
+0: lcr %r2, %r2 /* create positive errno */
+ st %r2, 0(%r9) /* store errno */
+ lghi %r2, -1
+ j 9b
+
.cfi_endproc
.size safe_syscall_base, .-safe_syscall_base
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
index f36992daa3..9a0c4c93b4 100644
--- a/common-user/host/x86_64/safe-syscall.inc.S
+++ b/common-user/host/x86_64/safe-syscall.inc.S
@@ -14,18 +14,17 @@
.global safe_syscall_end
.type safe_syscall_base, @function
- /* This is the entry point for making a system call. The calling
+ /*
+ * This is the entry point for making a system call. The calling
* convention here is that of a C varargs function with the
* first argument an 'int *' to the signal_pending flag, the
* second one the system call number (as a 'long'), and all further
* arguments being syscall arguments (also 'long').
- * We return a long which is the syscall's return value, which
- * may be negative-errno on failure. Conversion to the
- * -1-and-errno-set convention is done by the calling wrapper.
*/
safe_syscall_base:
.cfi_startproc
- /* This saves a frame pointer and aligns the stack for the syscall.
+ /*
+ * This saves a frame pointer and aligns the stack for the syscall.
* (It's unclear if the syscall ABI has the same stack alignment
* requirements as the userspace function call ABI, but better safe than
* sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf
@@ -35,11 +34,12 @@ safe_syscall_base:
.cfi_adjust_cfa_offset 8
.cfi_rel_offset rbp, 0
- /* The syscall calling convention isn't the same as the
- * C one:
- * we enter with rdi == *signal_pending
- * rsi == syscall number
- * rdx, rcx, r8, r9, (stack), (stack) == syscall arguments
+ /*
+ * The syscall calling convention isn't the same as the C one:
+ * we enter with rdi == &signal_pending
+ * rsi == &errno
+ * rdx == syscall number
+ * rcx, r8, r9, (stack...) == syscall arguments
* and return the result in rax
* and the syscall instruction needs
* rax == syscall number
@@ -48,17 +48,19 @@ safe_syscall_base:
* Shuffle everything around appropriately.
* Note that syscall will trash rcx and r11.
*/
- mov %rsi, %rax /* syscall number */
- mov %rdi, %rbp /* signal_pending pointer */
+ mov %rdi, %r11 /* signal_pending pointer */
+ mov %rsi, %rbp /* errno pointer */
+ mov %rdx, %rax /* syscall number */
/* and the syscall arguments */
- mov %rdx, %rdi
- mov %rcx, %rsi
- mov %r8, %rdx
- mov %r9, %r10
- mov 16(%rsp), %r8
- mov 24(%rsp), %r9
+ mov %rcx, %rdi
+ mov %r8, %rsi
+ mov %r9, %rdx
+ mov 16(%rsp), %r10
+ mov 24(%rsp), %r8
+ mov 32(%rsp), %r9
- /* This next sequence of code works in conjunction with the
+ /*
+ * This next sequence of code works in conjunction with the
* rewind_if_safe_syscall_function(). If a signal is taken
* and the interrupted PC is anywhere between 'safe_syscall_start'
* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
@@ -67,25 +69,31 @@ safe_syscall_base:
*/
safe_syscall_start:
/* if signal_pending is non-zero, don't do the call */
- cmpl $0, (%rbp)
- jnz 1f
+ cmpl $0, (%r11)
+ jnz 2f
syscall
safe_syscall_end:
+
/* code path for having successfully executed the syscall */
- pop %rbp
+ cmp $-4095, %rax
+ jae 0f
+
+9: pop %rbp
.cfi_remember_state
.cfi_def_cfa_offset 8
.cfi_restore rbp
ret
-
-1:
- /* code path when we didn't execute the syscall */
.cfi_restore_state
- mov $-TARGET_ERESTARTSYS, %rax
- pop %rbp
- .cfi_def_cfa_offset 8
- .cfi_restore rbp
- ret
- .cfi_endproc
+ /* code path setting errno */
+0: neg %eax /* create positive errno */
+1: mov %eax, (%rbp) /* store errno */
+ mov $-1, %rax
+ jmp 9b
+
+ /* code path when we didn't execute the syscall */
+2: mov $TARGET_ERESTARTSYS, %eax
+ jmp 1b
+
+ .cfi_endproc
.size safe_syscall_base, .-safe_syscall_base
--
2.25.1
On Wed, Nov 17, 2021 at 9:04 AM Richard Henderson <
richard.henderson@linaro.org> wrote:
> The current api from safe_syscall_base() is to return -errno, which is
> the interface provided by *some* linux kernel abis. The wrapper macro,
> safe_syscall(), detects error, stores into errno, and returns -1, to
> match the api of the system syscall().
>
> For those kernel abis that do not return -errno natively, this leads
> to double syscall error detection. E.g. Linux ppc64, which sets the
> SO flag for error.
>
> Simplify the usage from C by moving the error detection into assembly.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> linux-user/safe-syscall.h | 20 +++---
> common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
> common-user/host/arm/safe-syscall.inc.S | 58 ++++++++++-------
> common-user/host/i386/safe-syscall.inc.S | 51 +++++++++------
> common-user/host/ppc64/safe-syscall.inc.S | 63 +++++++++++--------
> common-user/host/riscv/safe-syscall.inc.S | 50 +++++++++------
> common-user/host/s390x/safe-syscall.inc.S | 50 +++++++++------
> common-user/host/x86_64/safe-syscall.inc.S | 70 ++++++++++++---------
> 8 files changed, 243 insertions(+), 174 deletions(-)
>
Reviewed by: Warner Losh <imp@bsdimp.com>
And I think it may fix a bug, when integrated with bsd-user fork,
in PowerPC hosts (though there's other bugs lingering there).
> diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
> index aaa9ffc0e2..ea0e8a8d24 100644
> --- a/linux-user/safe-syscall.h
> +++ b/linux-user/safe-syscall.h
> @@ -125,23 +125,17 @@
> * kinds of restartability.
> */
> #ifdef HAVE_SAFE_SYSCALL
> -/* The core part of this function is implemented in assembly */
> -extern long safe_syscall_base(int *pending, long number, ...);
> +
> +/* The core part of this function is implemented in assembly. */
> +extern long safe_syscall_base(int *pending, int *errnop, long number,
> ...);
> +
> /* These are defined by the safe-syscall.inc.S file */
> extern char safe_syscall_start[];
> extern char safe_syscall_end[];
>
> -#define safe_syscall(...) \
> - ({ \
> - long ret_; \
> - int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
> - ret_ = safe_syscall_base(psp_, __VA_ARGS__); \
> - if (is_error(ret_)) { \
> - errno = -ret_; \
> - ret_ = -1; \
> - } \
> - ret_; \
> - })
> +#define safe_syscall(...)
> \
> + safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending,
> \
> + &errno, __VA_ARGS__)
>
> #else
>
> diff --git a/common-user/host/aarch64/safe-syscall.inc.S
> b/common-user/host/aarch64/safe-syscall.inc.S
> index bc1f5a9792..95c60d8609 100644
> --- a/common-user/host/aarch64/safe-syscall.inc.S
> +++ b/common-user/host/aarch64/safe-syscall.inc.S
> @@ -17,22 +17,21 @@
> .type safe_syscall_start, #function
> .type safe_syscall_end, #function
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> - /* The syscall calling convention isn't the same as the
> - * C one:
> + /*
> + * The syscall calling convention isn't the same as the C one:
> * we enter with x0 == *signal_pending
> - * x1 == syscall number
> - * x2 ... x7, (stack) == syscall arguments
> + * x1 == errno
> + * x2 == syscall number
> + * x3 ... x7, (stack) == syscall arguments
> * and return the result in x0
> * and the syscall instruction needs
> * x8 == syscall number
> @@ -40,17 +39,18 @@ safe_syscall_base:
> * and returns the result in x0
> * Shuffle everything around appropriately.
> */
> - mov x9, x0 /* signal_pending pointer */
> - mov x8, x1 /* syscall number */
> - mov x0, x2 /* syscall arguments */
> - mov x1, x3
> - mov x2, x4
> - mov x3, x5
> - mov x4, x6
> - mov x5, x7
> - ldr x6, [sp]
> + mov x10, x0 /* signal_pending pointer */
> + mov x11, x1 /* errno pointer */
> + mov x8, x2 /* syscall number */
> + mov x0, x3 /* syscall arguments */
> + mov x1, x4
> + mov x2, x5
> + mov x3, x6
> + mov x4, x7
> + ldp x5, x6, [sp]
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -59,17 +59,26 @@ safe_syscall_base:
> */
> safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> - ldr w10, [x9]
> - cbnz w10, 0f
> + ldr w9, [x10]
> + cbnz w9, 2f
> svc 0x0
> safe_syscall_end:
> +
> /* code path for having successfully executed the syscall */
> + cmn x0, #4095
> + b.cs 1f
> ret
>
> -0:
> - /* code path when we didn't execute the syscall */
> - mov x0, #-TARGET_ERESTARTSYS
> + /* code path setting errno */
> +0: neg w0, w0 /* create positive errno */
> +1: str w0, [x11] /* store errno */
> + mov x0, #-1
> ret
> +
> + /* code path when we didn't execute the syscall */
> +2: mov w0, #TARGET_ERESTARTSYS
> + b 1b
> +
> .cfi_endproc
>
> .size safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/arm/safe-syscall.inc.S
> b/common-user/host/arm/safe-syscall.inc.S
> index 88c4958504..17839c6486 100644
> --- a/common-user/host/arm/safe-syscall.inc.S
> +++ b/common-user/host/arm/safe-syscall.inc.S
> @@ -22,33 +22,35 @@
> .arm
> .align 2
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .fnstart
> .cfi_startproc
> mov r12, sp /* save entry stack */
> - push { r4, r5, r6, r7, r8, lr }
> - .save { r4, r5, r6, r7, r8, lr }
> - .cfi_adjust_cfa_offset 24
> + push { r4, r5, r6, r7, r8, r9, r10, lr }
> + .save { r4, r5, r6, r7, r8, r9, r10, lr }
> + .cfi_adjust_cfa_offset 32
> .cfi_rel_offset r4, 0
> .cfi_rel_offset r5, 4
> .cfi_rel_offset r6, 8
> .cfi_rel_offset r7, 12
> .cfi_rel_offset r8, 16
> - .cfi_rel_offset lr, 20
> + .cfi_rel_offset r9, 20
> + .cfi_rel_offset r10, 24
> + .cfi_rel_offset lr, 28
>
> - /* The syscall calling convention isn't the same as the C one:
> - * we enter with r0 == *signal_pending
> - * r1 == syscall number
> - * r2, r3, [sp+0] ... [sp+12] == syscall arguments
> + /*
> + * The syscall calling convention isn't the same as the C one:
> + * we enter with r0 == &signal_pending
> + * r1 == &errno
> + * r2 == syscall number
> + * r3, [sp+0] ... [sp+16] == syscall arguments
> * and return the result in r0
> * and the syscall instruction needs
> * r7 == syscall number
> @@ -58,12 +60,13 @@ safe_syscall_base:
> * Note the 16 bytes that we pushed to save registers.
> */
> mov r8, r0 /* copy signal_pending */
> - mov r7, r1 /* syscall number */
> - mov r0, r2 /* syscall args */
> - mov r1, r3
> - ldm r12, { r2, r3, r4, r5, r6 }
> + mov r9, r1 /* copy errnop */
> + mov r7, r2 /* syscall number */
> + mov r0, r3 /* syscall args */
> + ldm r12, { r1, r2, r3, r4, r5, r6 }
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -74,16 +77,25 @@ safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> ldr r12, [r8] /* signal_pending */
> tst r12, r12
> - bne 1f
> + bne 2f
> swi 0
> safe_syscall_end:
> - /* code path for having successfully executed the syscall */
> - pop { r4, r5, r6, r7, r8, pc }
>
> -1:
> + /* code path for having successfully executed the syscall */
> + cmp r0, #-4096
> + bhi 0f
> +9: pop { r4, r5, r6, r7, r8, r9, r10, pc }
> +
> + /* code path setting errno */
> +0: neg r0, r0 /* create positive errno */
> +1: str r0, [r9] /* store errno */
> + mov r0, #-1
> + b 9b
> +
> /* code path when we didn't execute the syscall */
> - ldr r0, =-TARGET_ERESTARTSYS
> - pop { r4, r5, r6, r7, r8, pc }
> +2: ldr r0, =TARGET_ERESTARTSYS
> + b 1b
> +
> .fnend
> .cfi_endproc
>
> diff --git a/common-user/host/i386/safe-syscall.inc.S
> b/common-user/host/i386/safe-syscall.inc.S
> index 9e58fc6504..ad89521783 100644
> --- a/common-user/host/i386/safe-syscall.inc.S
> +++ b/common-user/host/i386/safe-syscall.inc.S
> @@ -15,14 +15,12 @@
> .global safe_syscall_end
> .type safe_syscall_base, @function
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> @@ -41,9 +39,10 @@ safe_syscall_base:
>
> /* The syscall calling convention isn't the same as the C one:
> * we enter with 0(%esp) == return address
> - * 4(%esp) == *signal_pending
> - * 8(%esp) == syscall number
> - * 12(%esp) ... 32(%esp) == syscall arguments
> + * 4(%esp) == &signal_pending
> + * 8(%esp) == &errno
> + * 12(%esp) == syscall number
> + * 16(%esp) ... 36(%esp) == syscall arguments
> * and return the result in eax
> * and the syscall instruction needs
> * eax == syscall number
> @@ -52,14 +51,15 @@ safe_syscall_base:
> * Shuffle everything around appropriately.
> * Note the 16 bytes that we pushed to save registers.
> */
> - mov 12+16(%esp), %ebx /* the syscall arguments */
> - mov 16+16(%esp), %ecx
> - mov 20+16(%esp), %edx
> - mov 24+16(%esp), %esi
> - mov 28+16(%esp), %edi
> - mov 32+16(%esp), %ebp
> + mov 16+16(%esp), %ebx /* the syscall arguments */
> + mov 20+16(%esp), %ecx
> + mov 24+16(%esp), %edx
> + mov 28+16(%esp), %esi
> + mov 32+16(%esp), %edi
> + mov 36+16(%esp), %ebp
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -70,12 +70,16 @@ safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> mov 4+16(%esp), %eax /* signal_pending */
> cmpl $0, (%eax)
> - jnz 1f
> + jnz 2f
> mov 8+16(%esp), %eax /* syscall number */
> int $0x80
> safe_syscall_end:
> +
> /* code path for having successfully executed the syscall */
> - pop %ebx
> + cmp $-4095, %eax
> + jae 0f
> +
> +9: pop %ebx
> .cfi_remember_state
> .cfi_adjust_cfa_offset -4
> .cfi_restore ebx
> @@ -90,11 +94,18 @@ safe_syscall_end:
> .cfi_restore ebp
> ret
>
> -1:
> - /* code path when we didn't execute the syscall */
> .cfi_restore_state
> - mov $-TARGET_ERESTARTSYS, %eax
> - jmp safe_syscall_end
> +
> + /* code path setting errno */
> +0: neg %eax /* create positive errno */
> +1: mov 8+16(%esp), %ebx /* load errno pointer */
> + mov %eax, (%ebx) /* store errno */
> + mov $-1, %eax
> + jmp 9b
> +
> + /* code path when we didn't execute the syscall */
> +2: mov $TARGET_ERESTARTSYS, %eax
> + jmp 1b
> .cfi_endproc
>
> .size safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/ppc64/safe-syscall.inc.S
> b/common-user/host/ppc64/safe-syscall.inc.S
> index 875133173b..e35408c5fb 100644
> --- a/common-user/host/ppc64/safe-syscall.inc.S
> +++ b/common-user/host/ppc64/safe-syscall.inc.S
> @@ -17,14 +17,19 @@
>
> .text
>
> - /* This is the entry point for making a system call. The calling
> +#if _CALL_ELF == 2
> +#define PARAM_OFS 32
> +#else
> +#define PARAM_OFS 48
> +#endif
> +#define PARAM(X) PARAM_OFS + X*8
> +
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> #if _CALL_ELF == 2
> safe_syscall_base:
> @@ -39,9 +44,11 @@ safe_syscall_base:
> .L.safe_syscall_base:
> .cfi_startproc
> #endif
> - /* We enter with r3 == *signal_pending
> - * r4 == syscall number
> - * r5 ... r10 == syscall arguments
> + /*
> + * We enter with r3 == &signal_pending
> + * r4 == &errno
> + * r5 == syscall number
> + * r6 ... r10, (stack) == syscall arguments
> * and return the result in r3
> * and the syscall instruction needs
> * r0 == syscall number
> @@ -49,18 +56,18 @@ safe_syscall_base:
> * and returns the result in r3
> * Shuffle everything around appropriately.
> */
> - std 14, 16(1) /* Preserve r14 in SP+16 */
> - .cfi_offset 14, 16
> - mr 14, 3 /* signal_pending */
> - mr 0, 4 /* syscall number */
> - mr 3, 5 /* syscall arguments */
> - mr 4, 6
> - mr 5, 7
> - mr 6, 8
> - mr 7, 9
> - mr 8, 10
> + mr 11, 3 /* signal_pending pointer */
> + std 4, PARAM(1)(1) /* save errno pointer in param slot */
> + mr 0, 5 /* syscall number */
> + mr 3, 6 /* syscall arguments */
> + mr 4, 7
> + mr 5, 8
> + mr 6, 9
> + mr 7, 10
> + ld 8, PARAM(8)(1)
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -69,23 +76,25 @@ safe_syscall_base:
> */
> safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> - lwz 12, 0(14)
> + lwz 12, 0(11)
> cmpwi 0, 12, 0
> bne- 0f
> sc
> safe_syscall_end:
> - /* code path when we did execute the syscall */
> - ld 14, 16(1) /* restore r14 to its original value */
> - bnslr+
>
> - /* syscall failed; return negative errno */
> - neg 3, 3
> + /* code path for having successfully executed the syscall */
> + bnslr+ /* SO set for syscall error */
> +
> + /* code path setting errno */
> +1: ld 11, PARAM(1)(1) /* restore errno pointer */
> + stw 3, 0(11) /* store errno */
> + li 3, -1
> blr
>
> /* code path when we didn't execute the syscall */
> -0: addi 3, 0, -TARGET_ERESTARTSYS
> - ld 14, 16(1) /* restore r14 to its original value */
> - blr
> +0: li 3, TARGET_ERESTARTSYS
> + b 1b
> +
> .cfi_endproc
>
> #if _CALL_ELF == 2
> diff --git a/common-user/host/riscv/safe-syscall.inc.S
> b/common-user/host/riscv/safe-syscall.inc.S
> index 9ca3fbfd1e..eddede702b 100644
> --- a/common-user/host/riscv/safe-syscall.inc.S
> +++ b/common-user/host/riscv/safe-syscall.inc.S
> @@ -23,17 +23,15 @@
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> /*
> * The syscall calling convention is nearly the same as C:
> - * we enter with a0 == *signal_pending
> - * a1 == syscall number
> - * a2 ... a7 == syscall arguments
> + * we enter with a0 == &signal_pending
> + * a1 == &errno
> + * a2 == syscall number
> + * a3 ... a7, [sp] == syscall arguments
> * and return the result in a0
> * and the syscall instruction needs
> * a7 == syscall number
> @@ -42,14 +40,19 @@ safe_syscall_base:
> * Shuffle everything around appropriately.
> */
> mv t0, a0 /* signal_pending pointer */
> - mv t1, a1 /* syscall number */
> - mv a0, a2 /* syscall arguments */
> - mv a1, a3
> - mv a2, a4
> - mv a3, a5
> - mv a4, a6
> - mv a5, a7
> - mv a7, t1
> + mv t1, a1 /* errno pointer */
> + mv t2, a2 /* syscall number */
> + mv a0, a3 /* syscall arguments */
> + mv a1, a4
> + mv a2, a5
> + mv a3, a6
> + mv a4, a7
> +#if __riscv_xlen == 32
> + lw a5, 0(sp)
> +#else
> + ld a5, 0(sp)
> +#endif
> + mv a7, t2
>
> /*
> * This next sequence of code works in conjunction with the
> @@ -61,17 +64,26 @@ safe_syscall_base:
> */
> safe_syscall_start:
> /* If signal_pending is non-zero, don't do the call */
> - lw t1, 0(t0)
> - bnez t1, 0f
> + lw t2, 0(t0)
> + bnez t2, 2f
> scall
> safe_syscall_end:
> +
> /* code path for having successfully executed the syscall */
> + li t2, -4096
> + bgtu a0, t2, 0f
> ret
>
> -0:
> - /* code path when we didn't execute the syscall */
> - li a0, -TARGET_ERESTARTSYS
> + /* code path setting errno */
> +0: neg a0, a0 /* create positive errno */
> +1: sw a0, 0(t1) /* store errno */
> + li a0, -1
> ret
> +
> + /* code path when we didn't execute the syscall */
> +2: li a0, TARGET_ERESTARTSYS
> + j 1b
> +
> .cfi_endproc
>
> .size safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/s390x/safe-syscall.inc.S
> b/common-user/host/s390x/safe-syscall.inc.S
> index 414b44ad38..f2a3bccc13 100644
> --- a/common-user/host/s390x/safe-syscall.inc.S
> +++ b/common-user/host/s390x/safe-syscall.inc.S
> @@ -15,14 +15,12 @@
> .global safe_syscall_end
> .type safe_syscall_base, @function
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> @@ -44,11 +42,13 @@ safe_syscall_base:
> stg %r1,0(%r15) /* store back chain */
> stg %r0,8(%r15) /* store eos */
>
> - /* The syscall calling convention isn't the same as the
> + /*
> + * The syscall calling convention isn't the same as the
> * C one:
> - * we enter with r2 == *signal_pending
> - * r3 == syscall number
> - * r4, r5, r6, (stack) == syscall arguments
> + * we enter with r2 == &signal_pending
> + * r3 == &errno
> + * r4 == syscall number
> + * r5, r6, (stack) == syscall arguments
> * and return the result in r2
> * and the syscall instruction needs
> * r1 == syscall number
> @@ -57,13 +57,14 @@ safe_syscall_base:
> * Shuffle everything around appropriately.
> */
> lgr %r8,%r2 /* signal_pending pointer */
> - lgr %r1,%r3 /* syscall number */
> - lgr %r2,%r4 /* syscall args */
> - lgr %r3,%r5
> - lgr %r4,%r6
> - lmg %r5,%r7,320(%r15)
> + lgr %r9,%r3 /* errno pointer */
> + lgr %r1,%r4 /* syscall number */
> + lgr %r2,%r5 /* syscall args */
> + lgr %r3,%r6
> + lmg %r4,%r7,320(%r15)
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -73,18 +74,31 @@ safe_syscall_base:
> safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> icm %r0,15,0(%r8)
> - jne 2f
> + jne 1f
> svc 0
> safe_syscall_end:
>
> -1: lg %r15,0(%r15) /* load back chain */
> + /* code path for having successfully executed the syscall */
> + lghi %r0, -4095 /* check for syscall error */
> + clgr %r2, %r0
> + jgnl 0f
> +
> +9: lg %r15,0(%r15) /* load back chain */
> .cfi_remember_state
> .cfi_adjust_cfa_offset -160
> lmg %r6,%r15,48(%r15) /* load saved registers */
> br %r14
> .cfi_restore_state
> -2: lghi %r2, -TARGET_ERESTARTSYS
> - j 1b
> +
> + /* code path when we didn't execute the syscall */
> +1: lghi %r2, -TARGET_ERESTARTSYS
> +
> + /* code path setting errno */
> +0: lcr %r2, %r2 /* create positive errno */
> + st %r2, 0(%r9) /* store errno */
> + lghi %r2, -1
> + j 9b
> +
> .cfi_endproc
>
> .size safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/x86_64/safe-syscall.inc.S
> b/common-user/host/x86_64/safe-syscall.inc.S
> index f36992daa3..9a0c4c93b4 100644
> --- a/common-user/host/x86_64/safe-syscall.inc.S
> +++ b/common-user/host/x86_64/safe-syscall.inc.S
> @@ -14,18 +14,17 @@
> .global safe_syscall_end
> .type safe_syscall_base, @function
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all
> further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> - /* This saves a frame pointer and aligns the stack for the
> syscall.
> + /*
> + * This saves a frame pointer and aligns the stack for the
> syscall.
> * (It's unclear if the syscall ABI has the same stack alignment
> * requirements as the userspace function call ABI, but better
> safe than
> * sorry. Appendix A2 of
> http://www.x86-64.org/documentation/abi.pdf
> @@ -35,11 +34,12 @@ safe_syscall_base:
> .cfi_adjust_cfa_offset 8
> .cfi_rel_offset rbp, 0
>
> - /* The syscall calling convention isn't the same as the
> - * C one:
> - * we enter with rdi == *signal_pending
> - * rsi == syscall number
> - * rdx, rcx, r8, r9, (stack), (stack) == syscall
> arguments
> + /*
> + * The syscall calling convention isn't the same as the C one:
> + * we enter with rdi == &signal_pending
> + * rsi == &errno
> + * rdx == syscall number
> + * rcx, r8, r9, (stack...) == syscall arguments
> * and return the result in rax
> * and the syscall instruction needs
> * rax == syscall number
> @@ -48,17 +48,19 @@ safe_syscall_base:
> * Shuffle everything around appropriately.
> * Note that syscall will trash rcx and r11.
> */
> - mov %rsi, %rax /* syscall number */
> - mov %rdi, %rbp /* signal_pending pointer */
> + mov %rdi, %r11 /* signal_pending pointer */
> + mov %rsi, %rbp /* errno pointer */
> + mov %rdx, %rax /* syscall number */
> /* and the syscall arguments */
> - mov %rdx, %rdi
> - mov %rcx, %rsi
> - mov %r8, %rdx
> - mov %r9, %r10
> - mov 16(%rsp), %r8
> - mov 24(%rsp), %r9
> + mov %rcx, %rdi
> + mov %r8, %rsi
> + mov %r9, %rdx
> + mov 16(%rsp), %r10
> + mov 24(%rsp), %r8
> + mov 32(%rsp), %r9
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to
> 'safe_syscall_start'.
> @@ -67,25 +69,31 @@ safe_syscall_base:
> */
> safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> - cmpl $0, (%rbp)
> - jnz 1f
> + cmpl $0, (%r11)
> + jnz 2f
> syscall
> safe_syscall_end:
> +
> /* code path for having successfully executed the syscall */
> - pop %rbp
> + cmp $-4095, %rax
> + jae 0f
> +
> +9: pop %rbp
> .cfi_remember_state
> .cfi_def_cfa_offset 8
> .cfi_restore rbp
> ret
> -
> -1:
> - /* code path when we didn't execute the syscall */
> .cfi_restore_state
> - mov $-TARGET_ERESTARTSYS, %rax
> - pop %rbp
> - .cfi_def_cfa_offset 8
> - .cfi_restore rbp
> - ret
> - .cfi_endproc
>
> + /* code path setting errno */
> +0: neg %eax /* create positive errno */
> +1: mov %eax, (%rbp) /* store errno */
> + mov $-1, %rax
> + jmp 9b
> +
> + /* code path when we didn't execute the syscall */
> +2: mov $TARGET_ERESTARTSYS, %eax
> + jmp 1b
> +
> + .cfi_endproc
> .size safe_syscall_base, .-safe_syscall_base
> --
> 2.25.1
>
>
On Wed, 17 Nov 2021 at 16:12, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The current api from safe_syscall_base() is to return -errno, which is
> the interface provided by *some* linux kernel abis. The wrapper macro,
> safe_syscall(), detects error, stores into errno, and returns -1, to
> match the api of the system syscall().
>
> For those kernel abis that do not return -errno natively, this leads
> to double syscall error detection. E.g. Linux ppc64, which sets the
> SO flag for error.
>
> Simplify the usage from C by moving the error detection into assembly.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> linux-user/safe-syscall.h | 20 +++---
> common-user/host/aarch64/safe-syscall.inc.S | 55 +++++++++-------
> common-user/host/arm/safe-syscall.inc.S | 58 ++++++++++-------
> common-user/host/i386/safe-syscall.inc.S | 51 +++++++++------
> common-user/host/ppc64/safe-syscall.inc.S | 63 +++++++++++--------
> common-user/host/riscv/safe-syscall.inc.S | 50 +++++++++------
> common-user/host/s390x/safe-syscall.inc.S | 50 +++++++++------
> common-user/host/x86_64/safe-syscall.inc.S | 70 ++++++++++++---------
> 8 files changed, 243 insertions(+), 174 deletions(-)
>
> diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
> index aaa9ffc0e2..ea0e8a8d24 100644
> --- a/linux-user/safe-syscall.h
> +++ b/linux-user/safe-syscall.h
> @@ -125,23 +125,17 @@
> * kinds of restartability.
> */
> #ifdef HAVE_SAFE_SYSCALL
> -/* The core part of this function is implemented in assembly */
> -extern long safe_syscall_base(int *pending, long number, ...);
> +
> +/* The core part of this function is implemented in assembly. */
> +extern long safe_syscall_base(int *pending, int *errnop, long number, ...);
> +
> /* These are defined by the safe-syscall.inc.S file */
> extern char safe_syscall_start[];
> extern char safe_syscall_end[];
>
> -#define safe_syscall(...) \
> - ({ \
> - long ret_; \
> - int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
> - ret_ = safe_syscall_base(psp_, __VA_ARGS__); \
> - if (is_error(ret_)) { \
> - errno = -ret_; \
> - ret_ = -1; \
> - } \
> - ret_; \
> - })
> +#define safe_syscall(...) \
> + safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
> + &errno, __VA_ARGS__)
>
> #else
>
> diff --git a/common-user/host/aarch64/safe-syscall.inc.S b/common-user/host/aarch64/safe-syscall.inc.S
> index bc1f5a9792..95c60d8609 100644
> --- a/common-user/host/aarch64/safe-syscall.inc.S
> +++ b/common-user/host/aarch64/safe-syscall.inc.S
> @@ -17,22 +17,21 @@
> .type safe_syscall_start, #function
> .type safe_syscall_end, #function
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
This comment text needs updating to mention the new errnop argument.
(Applies to all the similar comments in the files for the other archs.)
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .cfi_startproc
> - /* The syscall calling convention isn't the same as the
> - * C one:
> + /*
> + * The syscall calling convention isn't the same as the C one:
Looks like the indent here is wrong ?
> * we enter with x0 == *signal_pending
> - * x1 == syscall number
> - * x2 ... x7, (stack) == syscall arguments
> + * x1 == errno
"int* address of errno"
> + * x2 == syscall number
> + * x3 ... x7, (stack) == syscall arguments
> * and return the result in x0
> * and the syscall instruction needs
> * x8 == syscall number
> @@ -40,17 +39,18 @@ safe_syscall_base:
> * and returns the result in x0
> * Shuffle everything around appropriately.
> */
> - mov x9, x0 /* signal_pending pointer */
> - mov x8, x1 /* syscall number */
> - mov x0, x2 /* syscall arguments */
> - mov x1, x3
> - mov x2, x4
> - mov x3, x5
> - mov x4, x6
> - mov x5, x7
> - ldr x6, [sp]
> + mov x10, x0 /* signal_pending pointer */
> + mov x11, x1 /* errno pointer */
> + mov x8, x2 /* syscall number */
> + mov x0, x3 /* syscall arguments */
> + mov x1, x4
> + mov x2, x5
> + mov x3, x6
> + mov x4, x7
> + ldp x5, x6, [sp]
>
> - /* This next sequence of code works in conjunction with the
> + /*
> + * This next sequence of code works in conjunction with the
> * rewind_if_safe_syscall_function(). If a signal is taken
> * and the interrupted PC is anywhere between 'safe_syscall_start'
> * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
> @@ -59,17 +59,26 @@ safe_syscall_base:
> */
> safe_syscall_start:
> /* if signal_pending is non-zero, don't do the call */
> - ldr w10, [x9]
> - cbnz w10, 0f
> + ldr w9, [x10]
> + cbnz w9, 2f
> svc 0x0
> safe_syscall_end:
> +
> /* code path for having successfully executed the syscall */
> + cmn x0, #4095
> + b.cs 1f
Shouldn't this be going to label 0f ? We need to do the 'neg',
and unless I'm misreading the diff there's currently no path
of execution that gets to that.
Alternatively, branch on the opposite-sense condition to the
'ret' after the set-errno stuff.
> ret
>
> -0:
> - /* code path when we didn't execute the syscall */
> - mov x0, #-TARGET_ERESTARTSYS
> + /* code path setting errno */
> +0: neg w0, w0 /* create positive errno */
> +1: str w0, [x11] /* store errno */
> + mov x0, #-1
> ret
> +
> + /* code path when we didn't execute the syscall */
> +2: mov w0, #TARGET_ERESTARTSYS
> + b 1b
> +
> .cfi_endproc
>
> .size safe_syscall_base, .-safe_syscall_base
> diff --git a/common-user/host/arm/safe-syscall.inc.S b/common-user/host/arm/safe-syscall.inc.S
> index 88c4958504..17839c6486 100644
> --- a/common-user/host/arm/safe-syscall.inc.S
> +++ b/common-user/host/arm/safe-syscall.inc.S
> @@ -22,33 +22,35 @@
> .arm
> .align 2
>
> - /* This is the entry point for making a system call. The calling
> + /*
> + * This is the entry point for making a system call. The calling
> * convention here is that of a C varargs function with the
> * first argument an 'int *' to the signal_pending flag, the
> * second one the system call number (as a 'long'), and all further
> * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done by the calling wrapper.
> */
> safe_syscall_base:
> .fnstart
> .cfi_startproc
> mov r12, sp /* save entry stack */
> - push { r4, r5, r6, r7, r8, lr }
> - .save { r4, r5, r6, r7, r8, lr }
> - .cfi_adjust_cfa_offset 24
> + push { r4, r5, r6, r7, r8, r9, r10, lr }
> + .save { r4, r5, r6, r7, r8, r9, r10, lr }
> + .cfi_adjust_cfa_offset 32
> .cfi_rel_offset r4, 0
> .cfi_rel_offset r5, 4
> .cfi_rel_offset r6, 8
> .cfi_rel_offset r7, 12
> .cfi_rel_offset r8, 16
> - .cfi_rel_offset lr, 20
> + .cfi_rel_offset r9, 20
> + .cfi_rel_offset r10, 24
> + .cfi_rel_offset lr, 28
>
> - /* The syscall calling convention isn't the same as the C one:
> - * we enter with r0 == *signal_pending
> - * r1 == syscall number
> - * r2, r3, [sp+0] ... [sp+12] == syscall arguments
> + /*
> + * The syscall calling convention isn't the same as the C one:
> + * we enter with r0 == &signal_pending
> + * r1 == &errno
Odd indent ?
> + * r2 == syscall number
> + * r3, [sp+0] ... [sp+16] == syscall arguments
> * and return the result in r0
Don't we wind up with a potential issue here with 64-bit arguments
due to the calling convention wanting to put those in aligned
memory/register locations? Previously because we had just two
extra arguments the arguments started at r2 and had the same
alignment behaviour as the syscall wants for them starting at
r0; but now we start at r3 so if for instance the first argument
is 64-bit it will be in [sp+0][sp+4] but should go in r0:r1
I think...
(Stopped reviewing here because if we need to change the
way we call these functions there's no point my reviewing
the fine detail of the asm.)
-- PMM
On 11/22/21 12:55 PM, Peter Maydell wrote:
>> - /* This is the entry point for making a system call. The calling
>> + /*
>> + * This is the entry point for making a system call. The calling
>> * convention here is that of a C varargs function with the
>> * first argument an 'int *' to the signal_pending flag, the
>> * second one the system call number (as a 'long'), and all further
>> * arguments being syscall arguments (also 'long').
>
> This comment text needs updating to mention the new errnop argument.
> (Applies to all the similar comments in the files for the other archs.)
Yep.
>> + /*
>> + * The syscall calling convention isn't the same as the C one:
>
> Looks like the indent here is wrong ?
Irritatingly, these files are a mix of tabs/spaces.
>> * we enter with x0 == *signal_pending
>> - * x1 == syscall number
>> - * x2 ... x7, (stack) == syscall arguments
>> + * x1 == errno
>
> "int* address of errno"
Arg, fixed some of these, but clearly. not all.
>> /* code path for having successfully executed the syscall */
>> + cmn x0, #4095
>> + b.cs 1f
>
> Shouldn't this be going to label 0f ? We need to do the 'neg',
> and unless I'm misreading the diff there's currently no path
> of execution that gets to that.
Oops, rebase error, where the fix landed in the next patch.
>> + * r2 == syscall number
>> + * r3, [sp+0] ... [sp+16] == syscall arguments
>> * and return the result in r0
>
> Don't we wind up with a potential issue here with 64-bit arguments
> due to the calling convention wanting to put those in aligned
> memory/register locations? Previously because we had just two
> extra arguments the arguments started at r2 and had the same
> alignment behaviour as the syscall wants for them starting at
> r0; but now we start at r3 so if for instance the first argument
> is 64-bit it will be in [sp+0][sp+4] but should go in r0:r1
> I think...
>
> (Stopped reviewing here because if we need to change the
> way we call these functions there's no point my reviewing
> the fine detail of the asm.)
Oof. I missed that detail. Yes, that is a problem (I think arm is the only such
supported host). I think the best solution would be to *not* pass in &errno, but to have
the assembly tail-call to
long safe_syscall_errno_tail(int value)
{
errno = value;
return -1;
}
Which is probably more efficient in any case. I'll re-work this.
r~
On Mon, 22 Nov 2021 at 12:21, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/22/21 12:55 PM, Peter Maydell wrote: > >> + /* > >> + * The syscall calling convention isn't the same as the C one: > > > > Looks like the indent here is wrong ? > > Irritatingly, these files are a mix of tabs/spaces. Hmm, so they are; I wonder how we let that slip in. Maybe do a set of preparatory patches doing a mechanical tab-to-space conversion? -- PMM
© 2016 - 2026 Red Hat, Inc.