[PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD

Gregory Price posted 2 patches 2 years, 6 months ago
There is a newer version of this series
[PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Gregory Price 2 years, 6 months ago
Implement ptrace getter/setter interface for syscall user dispatch.

These prctl settings are presently write-only, making it impossible to
implement transparent checkpoint/restore via software like CRIU.

'on_dispatch' field is not exposed because it is a kernel-internal
only field that cannot be 'true' when returning to userland.

Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
 .../admin-guide/syscall-user-dispatch.rst     |  4 +
 include/linux/syscall_user_dispatch.h         | 18 +++++
 include/uapi/linux/ptrace.h                   | 30 ++++++++
 kernel/entry/syscall_user_dispatch.c          | 40 ++++++++++
 kernel/ptrace.c                               |  9 +++
 tools/testing/selftests/ptrace/.gitignore     |  1 +
 tools/testing/selftests/ptrace/Makefile       |  2 +-
 tools/testing/selftests/ptrace/get_set_sud.c  | 77 +++++++++++++++++++
 8 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/ptrace/get_set_sud.c

diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
index 60314953c728..f7648c08297e 100644
--- a/Documentation/admin-guide/syscall-user-dispatch.rst
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -73,6 +73,10 @@ thread-wide, without the need to invoke the kernel directly.  selector
 can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
 Any other value should terminate the program with a SIGSYS.
 
+Additionally, a task's syscall user dispatch configuration can be peeked
+and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
+requests. This is useful for checkpoint/restart software.
+
 Security Notes
 --------------
 
diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h
index a0ae443fb7df..641ca8880995 100644
--- a/include/linux/syscall_user_dispatch.h
+++ b/include/linux/syscall_user_dispatch.h
@@ -22,6 +22,12 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
 #define clear_syscall_work_syscall_user_dispatch(tsk) \
 	clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
 
+int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
+				     void __user *data);
+
+int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
+				     void __user *data);
+
 #else
 struct syscall_user_dispatch {};
 
@@ -35,6 +41,18 @@ static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *
 {
 }
 
+static inline int syscall_user_dispatch_get_config(struct task_struct *task,
+						   unsigned long size, void __user *data)
+{
+	return -EINVAL;
+}
+
+static inline int syscall_user_dispatch_set_config(struct task_struct *task,
+						   unsigned long size, void __user *data)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_GENERIC_ENTRY */
 
 #endif /* _SYSCALL_USER_DISPATCH_H */
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 195ae64a8c87..033f4db75c60 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -112,6 +112,36 @@ struct ptrace_rseq_configuration {
 	__u32 pad;
 };
 
+#define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG 0x4210
+#define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG 0x4211
+
+/*
+ * struct ptrace_sud_config - Per-task configuration for SUD
+ * @mode:	One of PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF
+ * @selector:	Tracee's user virtual address of SUD selector
+ * @offset:	SUD exclusion area (virtual address)
+ * @len:	Length of SUD exclusion area
+ *
+ * Used to get/set the syscall user dispatch configuration for tracee.
+ * process.  Selector is optional (may be NULL), and if invalid will produce
+ * a SIGSEGV in the tracee upon first access.
+ *
+ * If mode is PR_SYS_DISPATCH_ON, syscall dispatch will be enabled. If
+ * PR_SYS_DISPATCH_OFF, syscall dispatch will be disabled and all other
+ * parameters must be 0.  The value in *selector (if not null), also determines
+ * whether syscall dispatch will occur.
+ *
+ * The SUD Exclusion area described by offset/len is the virtual address space
+ * from which syscalls will not produce a user dispatch.
+ */
+struct ptrace_sud_config {
+	__u8  mode;
+	__u8  pad[7];
+	__u64 selector;
+	__u64 offset;
+	__u64 len;
+};
+
 /*
  * These values are stored in task->ptrace_message
  * by ptrace_stop to describe the current syscall-stop.
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 22396b234854..08e8b377557f 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -4,6 +4,7 @@
  */
 #include <linux/sched.h>
 #include <linux/prctl.h>
+#include <linux/ptrace.h>
 #include <linux/syscall_user_dispatch.h>
 #include <linux/uaccess.h>
 #include <linux/signal.h>
@@ -113,3 +114,42 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
 {
 	return task_set_syscall_user_dispatch(current, mode, offset, len, selector);
 }
+
+int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
+		                     void __user *data)
+{
+	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
+	struct ptrace_sud_config config;
+	if (size != sizeof(struct ptrace_sud_config))
+		return -EINVAL;
+
+	if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
+		config.mode = PR_SYS_DISPATCH_ON;
+	else
+		config.mode = PR_SYS_DISPATCH_OFF;
+
+	config.offset = sd->offset;
+	config.len = sd->len;
+	config.selector = (__u64)sd->selector;
+
+	if (copy_to_user(data, &config, sizeof(config))) {
+		return -EFAULT;
+	}
+	return 0;
+}
+
+int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
+		                     void __user *data)
+{
+	int rc;
+	struct ptrace_sud_config cfg;
+	if (size != sizeof(struct ptrace_sud_config))
+		return -EINVAL;
+
+	if (copy_from_user(&cfg, data, sizeof(cfg))) {
+		return -EFAULT;
+	}
+	rc = task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset,
+					    cfg.len, (void __user*)cfg.selector);
+	return rc;
+}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54482193e1ed..d99376532b56 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -32,6 +32,7 @@
 #include <linux/compat.h>
 #include <linux/sched/signal.h>
 #include <linux/minmax.h>
+#include <linux/syscall_user_dispatch.h>
 
 #include <asm/syscall.h>	/* for syscall_get_* */
 
@@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request,
 		break;
 #endif
 
+	case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG:
+		ret = syscall_user_dispatch_set_config(child, addr, datavp);
+		break;
+
+	case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG:
+		ret = syscall_user_dispatch_get_config(child, addr, datavp);
+		break;
+
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index 792318aaa30c..b7dde152e75a 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 get_syscall_info
+get_set_sud
 peeksiginfo
 vmaccess
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 2f1f532c39db..33a36b73bcb9 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall
 
-TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
+TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
 
 include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/get_set_sud.c b/tools/testing/selftests/ptrace/get_set_sud.c
new file mode 100644
index 000000000000..c4e7b87cab03
--- /dev/null
+++ b/tools/testing/selftests/ptrace/get_set_sud.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+
+#include "linux/ptrace.h"
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+	return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+TEST(get_set_sud)
+{
+	struct ptrace_sud_config config;
+	pid_t child;
+	int ret = 0;
+	int status;
+
+	child = fork();
+	ASSERT_GE(child, 0);
+	if (child == 0) {
+		ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+			TH_LOG("PTRACE_TRACEME: %m");
+		}
+		kill(getpid(), SIGSTOP);
+		_exit(1);
+	}
+
+	waitpid(child, &status, 0);
+
+	memset(&config, 0xff, sizeof(config));
+	config.mode = PR_SYS_DISPATCH_ON;
+
+	ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
+			 (void*)sizeof(config), &config);
+	if (ret < 0) {
+		ASSERT_EQ(errno, EIO);
+		goto leave;
+	}
+
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(config.mode, PR_SYS_DISPATCH_OFF);
+	ASSERT_EQ(config.selector, 0);
+	ASSERT_EQ(config.offset, 0);
+	ASSERT_EQ(config.len, 0);
+
+	config.mode = PR_SYS_DISPATCH_ON;
+	config.selector = 0;
+	config.offset = 0x400000;
+	config.len = 0x1000;
+
+	ret = sys_ptrace(PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG, child,
+			 (void*)sizeof(config), &config);
+
+	ASSERT_EQ(ret, 0);
+
+	memset(&config, 1, sizeof(config));
+	ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
+			 (void*)sizeof(config), &config);
+
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(config.mode, PR_SYS_DISPATCH_ON);
+	ASSERT_EQ(config.selector, 0);
+	ASSERT_EQ(config.offset, 0x400000);
+	ASSERT_EQ(config.len, 0x1000);
+
+leave:
+	kill(child, SIGKILL);
+}
+
+TEST_HARNESS_MAIN
-- 
2.39.1
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Andrei Vagin 2 years, 6 months ago
On Tue, Feb 21, 2023 at 12:18 PM Gregory Price
<gourry.memverge@gmail.com> wrote:

...

> diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
> index 22396b234854..08e8b377557f 100644
> --- a/kernel/entry/syscall_user_dispatch.c
> +++ b/kernel/entry/syscall_user_dispatch.c
> @@ -4,6 +4,7 @@
>   */
>  #include <linux/sched.h>
>  #include <linux/prctl.h>
> +#include <linux/ptrace.h>
>  #include <linux/syscall_user_dispatch.h>
>  #include <linux/uaccess.h>
>  #include <linux/signal.h>
> @@ -113,3 +114,42 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
>  {
>         return task_set_syscall_user_dispatch(current, mode, offset, len, selector);
>  }
> +
> +int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
> +                                    void __user *data)
> +{
> +       struct syscall_user_dispatch *sd = &task->syscall_dispatch;
> +       struct ptrace_sud_config config;

WARNING: Missing a blank line after declarations

You need to verify all patches with ./scripts/checkpatch.pl. Here are
a few other warnings.

> +       if (size != sizeof(struct ptrace_sud_config))
> +               return -EINVAL;
> +

config has to be fully initialized otherwise it leaks data from a kernel stack.

> +       if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
> +               config.mode = PR_SYS_DISPATCH_ON;
> +       else
> +               config.mode = PR_SYS_DISPATCH_OFF;
> +
> +       config.offset = sd->offset;
> +       config.len = sd->len;
> +       config.selector = (__u64)sd->selector;
> +
> +       if (copy_to_user(data, &config, sizeof(config))) {
> +               return -EFAULT;
> +       }
> +       return 0;
> +}
> +
> +int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
> +                                    void __user *data)
> +{
> +       int rc;
> +       struct ptrace_sud_config cfg;
> +       if (size != sizeof(struct ptrace_sud_config))
> +               return -EINVAL;
> +
> +       if (copy_from_user(&cfg, data, sizeof(cfg))) {
> +               return -EFAULT;
> +       }
> +       rc = task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset,
> +                                           cfg.len, (void __user*)cfg.selector);
> +       return rc;
> +}
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 54482193e1ed..d99376532b56 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -32,6 +32,7 @@
>  #include <linux/compat.h>
>  #include <linux/sched/signal.h>
>  #include <linux/minmax.h>
> +#include <linux/syscall_user_dispatch.h>
>
>  #include <asm/syscall.h>       /* for syscall_get_* */
>
> @@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request,
>                 break;
>  #endif
>
> +       case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG:
> +               ret = syscall_user_dispatch_set_config(child, addr, datavp);
> +               break;
> +
> +       case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG:
> +               ret = syscall_user_dispatch_get_config(child, addr, datavp);
> +               break;
> +
>         default:
>                 break;
>         }
> diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
> index 792318aaa30c..b7dde152e75a 100644
> --- a/tools/testing/selftests/ptrace/.gitignore
> +++ b/tools/testing/selftests/ptrace/.gitignore
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0-only
>  get_syscall_info
> +get_set_sud
>  peeksiginfo
>  vmaccess
> diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
> index 2f1f532c39db..33a36b73bcb9 100644
> --- a/tools/testing/selftests/ptrace/Makefile
> +++ b/tools/testing/selftests/ptrace/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0-only
>  CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall
>
> -TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
> +TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
>
>  include ../lib.mk
> diff --git a/tools/testing/selftests/ptrace/get_set_sud.c b/tools/testing/selftests/ptrace/get_set_sud.c

I think the test has to be in a separate patch.

> new file mode 100644
> index 000000000000..c4e7b87cab03
> --- /dev/null
> +++ b/tools/testing/selftests/ptrace/get_set_sud.c
> @@ -0,0 +1,77 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define _GNU_SOURCE
> +#include "../kselftest_harness.h"
> +#include <stdio.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <sys/wait.h>
> +#include <sys/syscall.h>
> +#include <sys/prctl.h>
> +
> +#include "linux/ptrace.h"
> +
> +static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
> +{
> +       return syscall(SYS_ptrace, request, pid, addr, data);
> +}
> +
> +TEST(get_set_sud)
> +{
> +       struct ptrace_sud_config config;
> +       pid_t child;
> +       int ret = 0;
> +       int status;
> +
> +       child = fork();
> +       ASSERT_GE(child, 0);
> +       if (child == 0) {
> +               ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
> +                       TH_LOG("PTRACE_TRACEME: %m");
> +               }
> +               kill(getpid(), SIGSTOP);
> +               _exit(1);
> +       }
> +
> +       waitpid(child, &status, 0);
> +
> +       memset(&config, 0xff, sizeof(config));
> +       config.mode = PR_SYS_DISPATCH_ON;
> +
> +       ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
> +                        (void*)sizeof(config), &config);
> +       if (ret < 0) {
> +               ASSERT_EQ(errno, EIO);

When do we expect to get EIO here?

> +               goto leave;
> +       }
> +
> +       ASSERT_EQ(ret, 0);
> +       ASSERT_EQ(config.mode, PR_SYS_DISPATCH_OFF);
> +       ASSERT_EQ(config.selector, 0);
> +       ASSERT_EQ(config.offset, 0);
> +       ASSERT_EQ(config.len, 0);
> +
> +       config.mode = PR_SYS_DISPATCH_ON;
> +       config.selector = 0;
> +       config.offset = 0x400000;
> +       config.len = 0x1000;
> +
> +       ret = sys_ptrace(PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG, child,
> +                        (void*)sizeof(config), &config);
> +
> +       ASSERT_EQ(ret, 0);
> +
> +       memset(&config, 1, sizeof(config));
> +       ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
> +                        (void*)sizeof(config), &config);
> +
> +       ASSERT_EQ(ret, 0);
> +       ASSERT_EQ(config.mode, PR_SYS_DISPATCH_ON);
> +       ASSERT_EQ(config.selector, 0);
> +       ASSERT_EQ(config.offset, 0x400000);
> +       ASSERT_EQ(config.len, 0x1000);
> +
> +leave:
> +       kill(child, SIGKILL);
> +}
> +
> +TEST_HARNESS_MAIN
> --
> 2.39.1
>
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Gregory Price 2 years, 6 months ago
On Fri, Feb 24, 2023 at 12:17:56AM -0800, Andrei Vagin wrote:
> WARNING: Missing a blank line after declarations
> 
> You need to verify all patches with ./scripts/checkpatch.pl. Here are
> a few other warnings.
>

I completely neglected this, aye aye

> > diff --git a/tools/testing/selftests/ptrace/get_set_sud.c b/tools/testing/selftests/ptrace/get_set_sud.c
> 
> I think the test has to be in a separate patch.
> 

will split

> > +       if (ret < 0) {
> > +               ASSERT_EQ(errno, EIO);
> 
> When do we expect to get EIO here?
> 

artifact from an old version i never dropped, had to do with my include
paths being messed up and not being able to include linux/ptrace.h for
some reason.  Will drop

~Gregory
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Oleg Nesterov 2 years, 6 months ago
On 02/21, Gregory Price wrote:
>
> +struct ptrace_sud_config {
> +	__u8  mode;
> +	__u8  pad[7];
              ^^^^^^
Why?

> +int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
> +		                     void __user *data)
> +{
> +	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
> +	struct ptrace_sud_config config;
> +	if (size != sizeof(struct ptrace_sud_config))
> +		return -EINVAL;

Andrei, do we really need this check?

> +
> +	if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
> +		config.mode = PR_SYS_DISPATCH_ON;
> +	else
> +		config.mode = PR_SYS_DISPATCH_OFF;
> +
> +	config.offset = sd->offset;
> +	config.len = sd->len;
> +	config.selector = (__u64)sd->selector;

As the kernel test robot reports, this is not -Wpointer-to-int-cast friendly.
Please use uintptr_t. See for example ptrace_get_rseq_configuration(). Same
for syscall_user_dispatch_set_config().

> +	if (copy_to_user(data, &config, sizeof(config))) {

This leaks info in (uninitialized) config.pad[]. You can probably simply make
config.mode __u64 as well.

Minor, but sizeof(struct ptrace_sud_config) above vs this sizeof(config)) doesn't
look consistent to me...

> +static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
> +{
> +	return syscall(SYS_ptrace, request, pid, addr, data);
> +}

Why can't you simply use ptrace() ?

Oleg.
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Gregory Price 2 years, 6 months ago
On Wed, Feb 22, 2023 at 01:48:35PM +0100, Oleg Nesterov wrote:
> On 02/21, Gregory Price wrote:
> >
> > +struct ptrace_sud_config {
> > +	__u8  mode;
> > +	__u8  pad[7];
>               ^^^^^^
> Why?
> 

The struct isn't packed, so this is for alignment/consistency of size.
The padding gets added anyway, and differently between 32/64 bit.
Without padding, allocating this in 32-bit mode creates a structure of
size 28 (4-byte aligned), while in 64-bit mode it creates a structure of
size 32 (8-byte aligned).

ptrace_syscall_info in the same file has the same thing.

> > +int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
> > +		                     void __user *data)
> > +{
> > +	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
> > +	struct ptrace_sud_config config;
> > +	if (size != sizeof(struct ptrace_sud_config))
> > +		return -EINVAL;
> 
> Andrei, do we really need this check?
> 

My understanding is that it's a sanity check against the above issue.
In fact, it was what lead me to add the padding.

Without the padding, sizeof(ptrace_sud_config) will be 32b in the kernel
and 28b in userland.

> > +
> > +	if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
> > +		config.mode = PR_SYS_DISPATCH_ON;
> > +	else
> > +		config.mode = PR_SYS_DISPATCH_OFF;
> > +
> > +	config.offset = sd->offset;
> > +	config.len = sd->len;
> > +	config.selector = (__u64)sd->selector;
> 
> As the kernel test robot reports, this is not -Wpointer-to-int-cast friendly.
> Please use uintptr_t. See for example ptrace_get_rseq_configuration(). Same
> for syscall_user_dispatch_set_config().
> 

.rseq_abi_pointer = (u64)(uintptr_t)task->rseq,

aye aye. I saw the error yesterday, I need to change my compile settings.

> > +	if (copy_to_user(data, &config, sizeof(config))) {
> 
> This leaks info in (uninitialized) config.pad[]. You can probably simply make
> config.mode __u64 as well.
> 
> Minor, but sizeof(struct ptrace_sud_config) above vs this sizeof(config)) doesn't
> look consistent to me...
> 

I hadn't considered uninit data. It's technically a __u32, but it's
probably just cleaner to promote/cast here than deal with padding.

> > +static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
> > +{
> > +	return syscall(SYS_ptrace, request, pid, addr, data);
> > +}
> 
> Why can't you simply use ptrace() ?
> 
> Oleg.
> 

ptrace() is the libc wrapper around the syscall.

I would assume there are issues with #include <sys/ptrace.h> and
#include <linux/ptrace.h> in the same file. Since libc doesn't have the
new definitions.

Not sure if there's any stipulations around how selftests have to be
written, i just wrote this one based on the surrounding tests and got
it to work.  I would think direct usage of the syscall is preferred,
but i'm ignorant here.




I'll make some changes and give it a few days before shipping another
patch.

~Gregory
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Oleg Nesterov 2 years, 6 months ago
On 02/22, Gregory Price wrote:
>
> On Wed, Feb 22, 2023 at 01:48:35PM +0100, Oleg Nesterov wrote:
> > On 02/21, Gregory Price wrote:
> > >
> > > +struct ptrace_sud_config {
> > > +	__u8  mode;
> > > +	__u8  pad[7];
> >               ^^^^^^
> > Why?
> >
>
> The struct isn't packed, so this is for alignment/consistency of size.
> The padding gets added anyway, and differently between 32/64 bit.

OK, I have to admit I didn't know that alignof(long long) == 4 on 32 bit.

> > > +int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
> > > +		                     void __user *data)
> > > +{
> > > +	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
> > > +	struct ptrace_sud_config config;
> > > +	if (size != sizeof(struct ptrace_sud_config))
> > > +		return -EINVAL;
> >
> > Andrei, do we really need this check?
> >
>
> My understanding is that it's a sanity check against the above issue.
> In fact, it was what lead me to add the padding.

Well, if this is the only reason then this check and the "size" argument
ahould be removed, imo.

But perhaps it can be useful for future extensions, I dunno.

Oleg.
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Gregory Price 2 years, 6 months ago
On Thu, Feb 23, 2023 at 01:30:20PM +0100, Oleg Nesterov wrote:
> On 02/22, Gregory Price wrote:
> >
> > On Wed, Feb 22, 2023 at 01:48:35PM +0100, Oleg Nesterov wrote:
> > > On 02/21, Gregory Price wrote:
> > > >
> > > > +struct ptrace_sud_config {
> > > > +	__u8  mode;
> > > > +	__u8  pad[7];
> > >               ^^^^^^
> > > Why?
> > >
> >
> > The struct isn't packed, so this is for alignment/consistency of size.
> > The padding gets added anyway, and differently between 32/64 bit.
> 
> OK, I have to admit I didn't know that alignof(long long) == 4 on 32 bit.
> 
> > > > +int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
> > > > +		                     void __user *data)
> > > > +{
> > > > +	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
> > > > +	struct ptrace_sud_config config;
> > > > +	if (size != sizeof(struct ptrace_sud_config))
> > > > +		return -EINVAL;
> > >
> > > Andrei, do we really need this check?
> > >
> >
> > My understanding is that it's a sanity check against the above issue.
> > In fact, it was what lead me to add the padding.
> 
> Well, if this is the only reason then this check and the "size" argument
> ahould be removed, imo.
> 
> But perhaps it can be useful for future extensions, I dunno.
> 
> Oleg.
>

I suppose yes it could also be used to detect differences in versioning
if the struct changes in the future, and that would not require an API
change in the future to support it.

If something does change in the future, without the check you're kinda
SOL trying to add new fields.

~Gregory
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by Oleg Nesterov 2 years, 6 months ago
On 02/23, Gregory Price wrote:
>
> On Thu, Feb 23, 2023 at 01:30:20PM +0100, Oleg Nesterov wrote:
> >
> > Well, if this is the only reason then this check and the "size" argument
> > ahould be removed, imo.
> >
> > But perhaps it can be useful for future extensions, I dunno.
> >
> > Oleg.
> >
>
> I suppose yes it could also be used to detect differences in versioning
> if the struct changes in the future, and that would not require an API
> change in the future to support it.

Yes this is what I tried to say. So I won't argue.

Oleg.
Re: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
Posted by kernel test robot 2 years, 6 months ago
Hi Gregory,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on shuah-kselftest/fixes]
[also build test WARNING on linus/master tip/core/entry v6.2 next-20230221]
[cannot apply to shuah-kselftest/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Gregory-Price/syscall_user_dispatch-helper-function-to-operate-on-given-task/20230222-041959
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git fixes
patch link:    https://lore.kernel.org/r/20230221201740.2236-3-gregory.price%40memverge.com
patch subject: [PATCH v11 2/2] ptrace,syscall_user_dispatch: checkpoint/restore support for SUD
config: i386-tinyconfig (https://download.01.org/0day-ci/archive/20230222/202302220654.bRPCWovm-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/966fb8d2744f50ac8174fe3c5d942112c13c0962
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Gregory-Price/syscall_user_dispatch-helper-function-to-operate-on-given-task/20230222-041959
        git checkout 966fb8d2744f50ac8174fe3c5d942112c13c0962
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash kernel/entry/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302220654.bRPCWovm-lkp@intel.com/

All warnings (new ones prefixed by >>):

   kernel/entry/syscall_user_dispatch.c: In function 'syscall_user_dispatch_get_config':
>> kernel/entry/syscall_user_dispatch.c:133:27: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     133 |         config.selector = (__u64)sd->selector;
         |                           ^
   kernel/entry/syscall_user_dispatch.c: In function 'syscall_user_dispatch_set_config':
>> kernel/entry/syscall_user_dispatch.c:153:54: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     153 |                                             cfg.len, (void __user*)cfg.selector);
         |                                                      ^


vim +133 kernel/entry/syscall_user_dispatch.c

   117	
   118	int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
   119			                     void __user *data)
   120	{
   121		struct syscall_user_dispatch *sd = &task->syscall_dispatch;
   122		struct ptrace_sud_config config;
   123		if (size != sizeof(struct ptrace_sud_config))
   124			return -EINVAL;
   125	
   126		if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
   127			config.mode = PR_SYS_DISPATCH_ON;
   128		else
   129			config.mode = PR_SYS_DISPATCH_OFF;
   130	
   131		config.offset = sd->offset;
   132		config.len = sd->len;
 > 133		config.selector = (__u64)sd->selector;
   134	
   135		if (copy_to_user(data, &config, sizeof(config))) {
   136			return -EFAULT;
   137		}
   138		return 0;
   139	}
   140	
   141	int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
   142			                     void __user *data)
   143	{
   144		int rc;
   145		struct ptrace_sud_config cfg;
   146		if (size != sizeof(struct ptrace_sud_config))
   147			return -EINVAL;
   148	
   149		if (copy_from_user(&cfg, data, sizeof(cfg))) {
   150			return -EFAULT;
   151		}
   152		rc = task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset,
 > 153						    cfg.len, (void __user*)cfg.selector);

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests