From: Hou Tao <houtao1@huawei.com>
Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
from vsyscall page under x86-64 will trigger oops, so add one test case
to ensure that the problem is fixed.
Beside those four bpf helpers mentioned above, testing the read of
vsyscall page by using bpf_probe_read_user{_str} and
bpf_copy_from_user{_task}() as well.
vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
problem and the returned error codes.
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
.../selftests/bpf/prog_tests/read_vsyscall.c | 61 +++++++++++++++++++
.../selftests/bpf/progs/read_vsyscall.c | 45 ++++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
new file mode 100644
index 0000000000000..d9247cc89cf3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+ const char *name;
+ int ret;
+} all_read[] = {
+ { .name = "probe_read_kernel", .ret = -ERANGE },
+ { .name = "probe_read_kernel_str", .ret = -ERANGE },
+ { .name = "probe_read", .ret = -ERANGE },
+ { .name = "probe_read_str", .ret = -ERANGE },
+ /* __access_ok() will fail */
+ { .name = "probe_read_user", .ret = -EFAULT },
+ /* __access_ok() will fail */
+ { .name = "probe_read_user_str", .ret = -EFAULT },
+ /* access_ok() will fail */
+ { .name = "copy_from_user", .ret = -EFAULT },
+ /* both vma_lookup() and expand_stack() will fail */
+ { .name = "copy_from_user_task", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+ struct read_vsyscall *skel;
+ unsigned int i;
+ int err;
+
+#if !defined(__x86_64__)
+ test__skip();
+ return;
+#endif
+ skel = read_vsyscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ err = read_vsyscall__attach(skel);
+ if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+ goto out;
+
+ /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+ * but it doesn't affect the returned error codes.
+ */
+ skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+ usleep(1);
+
+ for (i = 0; i < ARRAY_SIZE(all_read); i++)
+ ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+ read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
new file mode 100644
index 0000000000000..986f96687ae15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[8];
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+ read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+ read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+ read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+ read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+ read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+ read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+ bpf_get_current_task_btf(), 0);
+
+ return 0;
+}
--
2.29.2
On 1/18/2024 11:30 PM, Hou Tao wrote: > vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or > vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the > problem and the returned error codes. > With vsyscall=emulate a direct read of the vsyscall address from userspace is expected to go through. This is mode deprecated so maybe it wouldn't matter much. Without the fix in patch 2/3, do you see the same behavior with vsyscall=emulate set in the cmdline? Sohil
On 1/23/2024 8:25 AM, Sohil Mehta wrote: > On 1/18/2024 11:30 PM, Hou Tao wrote: > >> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or >> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the >> problem and the returned error codes. >> > With vsyscall=emulate a direct read of the vsyscall address from > userspace is expected to go through. This is mode deprecated so maybe it > wouldn't matter much. Without the fix in patch 2/3, do you see the same > behavior with vsyscall=emulate set in the cmdline? Er, I think it depends on whether or not SMAP [1] feature is available. When SMAP feature is enabled, even the vsyscall page is populated, reading the vsyscall page through bpf_read_kernel() will trigger a page fault and then oops. But when there is not SMAP, bpf_read_kernel() will succeed. So I think the test may need to be skipped if vsyscall_mode is emulate. [1]: https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention > > Sohil
On 1/18/24 11:30 PM, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
>
> Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
> from vsyscall page under x86-64 will trigger oops, so add one test case
> to ensure that the problem is fixed.
>
> Beside those four bpf helpers mentioned above, testing the read of
> vsyscall page by using bpf_probe_read_user{_str} and
> bpf_copy_from_user{_task}() as well.
>
> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
> problem and the returned error codes.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
> .../selftests/bpf/prog_tests/read_vsyscall.c | 61 +++++++++++++++++++
> .../selftests/bpf/progs/read_vsyscall.c | 45 ++++++++++++++
> 2 files changed, 106 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> new file mode 100644
> index 0000000000000..d9247cc89cf3e
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
> +#include "test_progs.h"
> +#include "read_vsyscall.skel.h"
> +
> +#if defined(__x86_64__)
> +/* For VSYSCALL_ADDR */
> +#include <asm/vsyscall.h>
> +#else
> +/* To prevent build failure on non-x86 arch */
> +#define VSYSCALL_ADDR 0UL
> +#endif
> +
> +struct read_ret_desc {
> + const char *name;
> + int ret;
> +} all_read[] = {
> + { .name = "probe_read_kernel", .ret = -ERANGE },
> + { .name = "probe_read_kernel_str", .ret = -ERANGE },
> + { .name = "probe_read", .ret = -ERANGE },
> + { .name = "probe_read_str", .ret = -ERANGE },
> + /* __access_ok() will fail */
> + { .name = "probe_read_user", .ret = -EFAULT },
> + /* __access_ok() will fail */
> + { .name = "probe_read_user_str", .ret = -EFAULT },
> + /* access_ok() will fail */
> + { .name = "copy_from_user", .ret = -EFAULT },
> + /* both vma_lookup() and expand_stack() will fail */
> + { .name = "copy_from_user_task", .ret = -EFAULT },
The above comments are not clear enough. For example,
'__access_ok() will fail', user will need to
check the source code where __access_ok() is and
this could be hard e.g., for probe_read_user_str().
Another example, 'both vma_lookup() and expand_stack() will fail',
where is vma_lookup()/expand_stack()? User needs to further
check to make sense.
I suggest remove the above comments and add more
detailed explanation in commit messages with callstack
indicating where the fail/error return happens.
> +};
> +
> +void test_read_vsyscall(void)
> +{
> + struct read_vsyscall *skel;
> + unsigned int i;
> + int err;
> +
> +#if !defined(__x86_64__)
> + test__skip();
> + return;
> +#endif
> + skel = read_vsyscall__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
> + return;
> +
> + skel->bss->target_pid = getpid();
> + err = read_vsyscall__attach(skel);
> + if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
> + goto out;
> +
> + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
> + * but it doesn't affect the returned error codes.
> + */
> + skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
> + usleep(1);
> +
> + for (i = 0; i < ARRAY_SIZE(all_read); i++)
> + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
> +out:
> + read_vsyscall__destroy(skel);
> +}
[...]
Hi,
On 1/22/2024 2:30 PM, Yonghong Song wrote:
>
> On 1/18/24 11:30 PM, Hou Tao wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
>> from vsyscall page under x86-64 will trigger oops, so add one test case
>> to ensure that the problem is fixed.
>>
>> Beside those four bpf helpers mentioned above, testing the read of
>> vsyscall page by using bpf_probe_read_user{_str} and
>> bpf_copy_from_user{_task}() as well.
>>
>> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
>> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
>> problem and the returned error codes.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>> .../selftests/bpf/prog_tests/read_vsyscall.c | 61 +++++++++++++++++++
>> .../selftests/bpf/progs/read_vsyscall.c | 45 ++++++++++++++
>> 2 files changed, 106 insertions(+)
>> create mode 100644
>> tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> new file mode 100644
>> index 0000000000000..d9247cc89cf3e
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> @@ -0,0 +1,61 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
>> +#include "test_progs.h"
>> +#include "read_vsyscall.skel.h"
>> +
>> +#if defined(__x86_64__)
>> +/* For VSYSCALL_ADDR */
>> +#include <asm/vsyscall.h>
>> +#else
>> +/* To prevent build failure on non-x86 arch */
>> +#define VSYSCALL_ADDR 0UL
>> +#endif
>> +
>> +struct read_ret_desc {
>> + const char *name;
>> + int ret;
>> +} all_read[] = {
>> + { .name = "probe_read_kernel", .ret = -ERANGE },
>> + { .name = "probe_read_kernel_str", .ret = -ERANGE },
>> + { .name = "probe_read", .ret = -ERANGE },
>> + { .name = "probe_read_str", .ret = -ERANGE },
>> + /* __access_ok() will fail */
>> + { .name = "probe_read_user", .ret = -EFAULT },
>> + /* __access_ok() will fail */
>> + { .name = "probe_read_user_str", .ret = -EFAULT },
>> + /* access_ok() will fail */
>> + { .name = "copy_from_user", .ret = -EFAULT },
>> + /* both vma_lookup() and expand_stack() will fail */
>> + { .name = "copy_from_user_task", .ret = -EFAULT },
>
> The above comments are not clear enough. For example,
> '__access_ok() will fail', user will need to
> check the source code where __access_ok() is and
> this could be hard e.g., for probe_read_user_str().
> Another example, 'both vma_lookup() and expand_stack() will fail',
> where is vma_lookup()/expand_stack()? User needs to further
> check to make sense.
Yes. These comment are highly coupled with the implementation.
>
> I suggest remove the above comments and add more
> detailed explanation in commit messages with callstack
> indicating where the fail/error return happens.
Will do in v2. Thanks for the suggestions.
>
>> +};
>> +
>> +void test_read_vsyscall(void)
>> +{
>> + struct read_vsyscall *skel;
>> + unsigned int i;
>> + int err;
>> +
>> +#if !defined(__x86_64__)
>> + test__skip();
>> + return;
>> +#endif
>> + skel = read_vsyscall__open_and_load();
>> + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
>> + return;
>> +
>> + skel->bss->target_pid = getpid();
>> + err = read_vsyscall__attach(skel);
>> + if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
>> + goto out;
>> +
>> + /* userspace may don't have vsyscall page due to
>> LEGACY_VSYSCALL_NONE,
>> + * but it doesn't affect the returned error codes.
>> + */
>> + skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
>> + usleep(1);
>> +
>> + for (i = 0; i < ARRAY_SIZE(all_read); i++)
>> + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret,
>> all_read[i].name);
>> +out:
>> + read_vsyscall__destroy(skel);
>> +}
> [...]
© 2016 - 2025 Red Hat, Inc.