When using GCC on x86-64 to compile an usdt prog with -O1 or higher
optimization, the compiler will generate SIB addressing mode for global
array and PC-relative addressing mode for global variable,
e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
In this patch:
- enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec
handling logic
Signed-off-by: Jiawei Zhao <phoenix500526@163.com>
---
tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++-
tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++
2 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 9057e983cc54..c04b416aa4a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes");
unsigned short test_usdt3_semaphore SEC(".probes");
unsigned short test_usdt12_semaphore SEC(".probes");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+#endif
+
static void __always_inline trigger_func(int x) {
long y = 42;
@@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) {
}
}
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
+static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
+{
+ /* Base address + offset + (index * scale) */
+ /* Force SIB addressing with inline assembly */
+ asm volatile(
+ "# probe point with memory access\n"
+ STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
+ "# end probe point"
+ :
+ : "d"(nums), "a"(0)
+ : "memory"
+ );
+}
+#endif
+
static void subtest_basic_usdt(void)
{
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
int err, i;
+ const __u64 expected_cookie = 0xcafedeadbeeffeed;
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -59,20 +80,29 @@ static void subtest_basic_usdt(void)
goto cleanup;
/* usdt0 won't be auto-attached */
- opts.usdt_cookie = 0xcafedeadbeeffeed;
+ opts.usdt_cookie = expected_cookie;
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
0 /*self*/, "/proc/self/exe",
"test", "usdt0", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
goto cleanup;
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt_sib", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+ goto cleanup;
+#endif
+
trigger_func(1);
ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
- ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
@@ -156,6 +186,16 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
+ trigger_sib_spec();
+ ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+ ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+ ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+ ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+ ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+ ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
cleanup:
test_usdt__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 096488f47fbc..b5f883bca66b 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -107,4 +107,34 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
return 0;
}
+
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+u64 usdt_sib_arg;
+int usdt_sib_arg_size;
+
+// Note: usdt_sib is only tested on x86-related architectures, so it requires
+// manual attach since auto-attach will panic tests under other architectures
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_sib_called, 1);
+
+ usdt_sib_cookie = bpf_usdt_cookie(ctx);
+ usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt_sib_arg = (short)tmp;
+ usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
--
2.43.0
On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao <phoenix500526@163.com> wrote:
>
> When using GCC on x86-64 to compile an usdt prog with -O1 or higher
> optimization, the compiler will generate SIB addressing mode for global
> array and PC-relative addressing mode for global variable,
> e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
>
> In this patch:
> - enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec
> handling logic
>
> Signed-off-by: Jiawei Zhao <phoenix500526@163.com>
> ---
> tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++-
> tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++
> 2 files changed, 72 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
> index 9057e983cc54..c04b416aa4a8 100644
> --- a/tools/testing/selftests/bpf/prog_tests/usdt.c
> +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
> @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes");
> unsigned short test_usdt3_semaphore SEC(".probes");
> unsigned short test_usdt12_semaphore SEC(".probes");
>
> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
> +unsigned short test_usdt_sib_semaphore SEC(".probes");
> +#endif
> +
> static void __always_inline trigger_func(int x) {
> long y = 42;
>
> @@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) {
> }
> }
>
> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
> +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
you use assembly directly, so optimize() should be irrelevant, no?
So we can make this non-GCC specific, right?
> +{
> + /* Base address + offset + (index * scale) */
> + /* Force SIB addressing with inline assembly */
> + asm volatile(
> + "# probe point with memory access\n"
> + STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
is it guaranteed that nums address will end up in rdx and a in rax?...
I'd feel more comfortable if you explicitly set up rdx and rax in
assembly, then add USDT with STAP_PROBE_ASM. That should be possible
with embedded assembly, no?
> + "# end probe point"
why these unnecessary comments embedded in the assembly?...
> + :
> + : "d"(nums), "a"(0)
> + : "memory"
> + );
> +}
[...]
> +
> +int usdt_sib_called;
> +u64 usdt_sib_cookie;
> +int usdt_sib_arg_cnt;
> +int usdt_sib_arg_ret;
> +u64 usdt_sib_arg;
> +int usdt_sib_arg_size;
> +
> +// Note: usdt_sib is only tested on x86-related architectures, so it requires
> +// manual attach since auto-attach will panic tests under other architectures
don't use c++ style comments
> +SEC("usdt")
> +int usdt_sib(struct pt_regs *ctx)
> +{
> + long tmp;
> +
> + if (my_pid != (bpf_get_current_pid_tgid() >> 32))
> + return 0;
> +
> + __sync_fetch_and_add(&usdt_sib_called, 1);
> +
> + usdt_sib_cookie = bpf_usdt_cookie(ctx);
> + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
> +
> + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
> + usdt_sib_arg = (short)tmp;
> + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
> +
> + return 0;
> +}
> +
> char _license[] SEC("license") = "GPL";
> --
> 2.43.0
>
>> + STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
I think such hard-coding, for example -2@(%%rdx,%%rax,2), is not very maintainable.
Essentially, STAP_PROBE_ASM just performs simple text substitution. But in SIB addressing,
both scale and size depend on the actual parameter type.
For instance, in -2@(%%rdx,%%rax,2), the -2 and 2 come from the fact that nums is an
array of type short: the size of a short is 2, so the scale is 2; and since short is a signed type,
the size is -2.
STAP_PROBE_ASM is expanded at the preprocessing stage, while at that stage we cannot
compute the actual size of nums. If the type of nums changes in the future, it could lead to errors.
At 2025-08-23 06:59:49, "Andrii Nakryiko" <andrii.nakryiko@gmail.com> wrote:
>On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao <phoenix500526@163.com> wrote:
>>
>> When using GCC on x86-64 to compile an usdt prog with -O1 or higher
>> optimization, the compiler will generate SIB addressing mode for global
>> array and PC-relative addressing mode for global variable,
>> e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
>>
>> In this patch:
>> - enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec
>> handling logic
>>
>> Signed-off-by: Jiawei Zhao <phoenix500526@163.com>
>> ---
>> tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++-
>> tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++
>> 2 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
>> index 9057e983cc54..c04b416aa4a8 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/usdt.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
>> @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes");
>> unsigned short test_usdt3_semaphore SEC(".probes");
>> unsigned short test_usdt12_semaphore SEC(".probes");
>>
>> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
>
>does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
>
>> +unsigned short test_usdt_sib_semaphore SEC(".probes");
>> +#endif
>> +
>> static void __always_inline trigger_func(int x) {
>> long y = 42;
>>
>> @@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) {
>> }
>> }
>>
>> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
>> +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
>
>you use assembly directly, so optimize() should be irrelevant, no?
>
>So we can make this non-GCC specific, right?
>
>> +{
>> + /* Base address + offset + (index * scale) */
>> + /* Force SIB addressing with inline assembly */
>> + asm volatile(
>> + "# probe point with memory access\n"
>> + STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
>
>is it guaranteed that nums address will end up in rdx and a in rax?...
>
>I'd feel more comfortable if you explicitly set up rdx and rax in
>assembly, then add USDT with STAP_PROBE_ASM. That should be possible
>with embedded assembly, no?
>
>> + "# end probe point"
>
>why these unnecessary comments embedded in the assembly?...
>
>> + :
>> + : "d"(nums), "a"(0)
>> + : "memory"
>> + );
>> +}
>
>[...]
>
>> +
>> +int usdt_sib_called;
>> +u64 usdt_sib_cookie;
>> +int usdt_sib_arg_cnt;
>> +int usdt_sib_arg_ret;
>> +u64 usdt_sib_arg;
>> +int usdt_sib_arg_size;
>> +
>> +// Note: usdt_sib is only tested on x86-related architectures, so it requires
>> +// manual attach since auto-attach will panic tests under other architectures
>
>don't use c++ style comments
>
>> +SEC("usdt")
>> +int usdt_sib(struct pt_regs *ctx)
>> +{
>> + long tmp;
>> +
>> + if (my_pid != (bpf_get_current_pid_tgid() >> 32))
>> + return 0;
>> +
>> + __sync_fetch_and_add(&usdt_sib_called, 1);
>> +
>> + usdt_sib_cookie = bpf_usdt_cookie(ctx);
>> + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
>> +
>> + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
>> + usdt_sib_arg = (short)tmp;
>> + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
>> +
>> + return 0;
>> +}
>> +
>> char _license[] SEC("license") = "GPL";
>> --
>> 2.43.0
>>
>does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
Yes, clang does defind __GNUC__ .
>you use assembly directly, so optimize() should be irrelevant, no?
>
>So we can make this non-GCC specific, right?
Yes, I'll make it in the next version.
>is it guaranteed that nums address will end up in rdx and a in rax?...
>
>I'd feel more comfortable if you explicitly set up rdx and rax in
>assembly, then add USDT with STAP_PROBE_ASM. That should be possible
>with embedded assembly, no?
I think it will in that the input operand constrain `"d"(nums), "a"(0)` implies
the nums address will end up in rdx while the index 0 will be in rax.
So I think we don't need to explicitly set up rdx and rax again.
>why these unnecessary comments embedded in the assembly?...
Because there is "before" and "after" in the `STAP_PROBE_ASM` example.
It's OK to remove it.
At 2025-08-23 06:59:49, "Andrii Nakryiko" <andrii.nakryiko@gmail.com> wrote:
>On Fri, Aug 22, 2025 at 8:16 AM Jiawei Zhao <phoenix500526@163.com> wrote:
>>
>> When using GCC on x86-64 to compile an usdt prog with -O1 or higher
>> optimization, the compiler will generate SIB addressing mode for global
>> array and PC-relative addressing mode for global variable,
>> e.g. "1@-96(%rbp,%rax,8)" and "-1@4+t1(%rip)".
>>
>> In this patch:
>> - enrich subtest_basic_usdt test case to cover SIB addressing usdt argument spec
>> handling logic
>>
>> Signed-off-by: Jiawei Zhao <phoenix500526@163.com>
>> ---
>> tools/testing/selftests/bpf/prog_tests/usdt.c | 44 ++++++++++++++++++-
>> tools/testing/selftests/bpf/progs/test_usdt.c | 30 +++++++++++++
>> 2 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
>> index 9057e983cc54..c04b416aa4a8 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/usdt.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
>> @@ -25,6 +25,10 @@ unsigned short test_usdt0_semaphore SEC(".probes");
>> unsigned short test_usdt3_semaphore SEC(".probes");
>> unsigned short test_usdt12_semaphore SEC(".probes");
>>
>> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
>
>does clang define __GNUC__ as well? otherwise why !define(__clang__) ?
>
>> +unsigned short test_usdt_sib_semaphore SEC(".probes");
>> +#endif
>> +
>> static void __always_inline trigger_func(int x) {
>> long y = 42;
>>
>> @@ -40,12 +44,29 @@ static void __always_inline trigger_func(int x) {
>> }
>> }
>>
>> +#if ((defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && !defined(__clang__))
>> +static __attribute__((optimize("O1"))) void trigger_sib_spec(void)
>
>you use assembly directly, so optimize() should be irrelevant, no?
>
>So we can make this non-GCC specific, right?
>
>> +{
>> + /* Base address + offset + (index * scale) */
>> + /* Force SIB addressing with inline assembly */
>> + asm volatile(
>> + "# probe point with memory access\n"
>> + STAP_PROBE_ASM(test, usdt_sib, -2@(%%rdx,%%rax,2))
>
>is it guaranteed that nums address will end up in rdx and a in rax?...
>
>I'd feel more comfortable if you explicitly set up rdx and rax in
>assembly, then add USDT with STAP_PROBE_ASM. That should be possible
>with embedded assembly, no?
>
>> + "# end probe point"
>
>why these unnecessary comments embedded in the assembly?...
>
>> + :
>> + : "d"(nums), "a"(0)
>> + : "memory"
>> + );
>> +}
>
>[...]
>
>> +
>> +int usdt_sib_called;
>> +u64 usdt_sib_cookie;
>> +int usdt_sib_arg_cnt;
>> +int usdt_sib_arg_ret;
>> +u64 usdt_sib_arg;
>> +int usdt_sib_arg_size;
>> +
>> +// Note: usdt_sib is only tested on x86-related architectures, so it requires
>> +// manual attach since auto-attach will panic tests under other architectures
>
>don't use c++ style comments
>
>> +SEC("usdt")
>> +int usdt_sib(struct pt_regs *ctx)
>> +{
>> + long tmp;
>> +
>> + if (my_pid != (bpf_get_current_pid_tgid() >> 32))
>> + return 0;
>> +
>> + __sync_fetch_and_add(&usdt_sib_called, 1);
>> +
>> + usdt_sib_cookie = bpf_usdt_cookie(ctx);
>> + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
>> +
>> + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
>> + usdt_sib_arg = (short)tmp;
>> + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
>> +
>> + return 0;
>> +}
>> +
>> char _license[] SEC("license") = "GPL";
>> --
>> 2.43.0
>>
© 2016 - 2025 Red Hat, Inc.