Adding support to detect nop,nop5 instructions combo for usdt probe
by checking on probe's following nop5 instruction.
When the nop,nop5 combo is detected together with uprobe syscall,
we can place the probe on top of nop5 and get it optimized.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/lib/bpf/usdt.c | 47 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index d1524f6f54ae..c81e0a58caf5 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -262,6 +262,7 @@ struct usdt_manager {
bool has_bpf_cookie;
bool has_sema_refcnt;
bool has_uprobe_multi;
+ bool has_uprobe_syscall;
};
struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
@@ -301,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
* usdt probes.
*/
man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK);
+
+ /*
+ * Detect kernel support for uprobe() syscall, it's presence means we can
+ * take advantage of faster nop5 uprobe handling.
+ * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe")
+ */
+ man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL);
return man;
}
@@ -585,13 +593,34 @@ static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
-static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
- const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
- struct usdt_target **out_targets, size_t *out_target_cnt)
+#if defined(__x86_64__)
+static bool has_nop_combo(int fd, long off)
+{
+ static unsigned char nop_combo[6] = {
+ 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
+ };
+ unsigned char buf[6] = {};
+
+ if (pread(fd, buf, 6, off) != 6)
+ return false;
+ return memcmp(buf, nop_combo, 6) == 0;
+}
+#else
+static bool has_nop_combo(int fd, long off)
+{
+ return false;
+}
+#endif
+
+static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path,
+ pid_t pid, const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie, struct usdt_target **out_targets,
+ size_t *out_target_cnt)
{
size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
struct elf_seg *segs = NULL, *vma_segs = NULL;
struct usdt_target *targets = NULL, *target;
+ Elf *elf = elf_fd->elf;
long base_addr = 0;
Elf_Scn *notes_scn, *base_scn;
GElf_Shdr base_shdr, notes_shdr;
@@ -784,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
target = &targets[target_cnt];
memset(target, 0, sizeof(*target));
+ /*
+ * We have uprobe syscall and usdt with nop,nop5 instructions combo,
+ * so we can place the uprobe directly on nop5 (+1) and get this probe
+ * optimized.
+ */
+ if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) {
+ usdt_abs_ip++;
+ usdt_rel_ip++;
+ }
+
target->abs_ip = usdt_abs_ip;
target->rel_ip = usdt_rel_ip;
target->sema_off = usdt_sema_off;
@@ -998,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
/* discover USDT in given binary, optionally limiting
* activations to a given PID, if pid > 0
*/
- err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name,
+ err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name,
usdt_cookie, &targets, &target_cnt);
if (err <= 0) {
err = (err == 0) ? -ENOENT : err;
--
2.53.0
> diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
> index d1524f6f54ae..c81e0a58caf5 100644
> --- a/tools/lib/bpf/usdt.c
> +++ b/tools/lib/bpf/usdt.c
[ ... ]
> +#if defined(__x86_64__)
> +static bool has_nop_combo(int fd, long off)
> +{
> + static unsigned char nop_combo[6] = {
^^^^^^^^^^^^^^^^^^^^^^
Should nop_combo be declared const? It is a read-only lookup table
used only in the memcmp() below.
> + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
> + };
> + unsigned char buf[6] = {};
> +
> + if (pread(fd, buf, 6, off) != 6)
> + return false;
> + return memcmp(buf, nop_combo, 6) == 0;
> +}
[ ... ]
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/22221513473
AI-authorship-score: low
AI-authorship-explanation: Domain-specific optimization from established BPF/tracing maintainer with iterative lore review history and deep x86_64 uprobe internals knowledge.
issues-found: 1
issue-severity-score: low
issue-severity-explanation: Missing const qualifier on a static read-only byte array is a minor C best practice deviation with no runtime impact.
On Fri, 20 Feb 2026 11:24:45 +0000 (UTC)
bot+bpf-ci@kernel.org wrote:
> > diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
> > index d1524f6f54ae..c81e0a58caf5 100644
> > --- a/tools/lib/bpf/usdt.c
> > +++ b/tools/lib/bpf/usdt.c
>
> [ ... ]
>
> > +#if defined(__x86_64__)
> > +static bool has_nop_combo(int fd, long off)
> > +{
> > + static unsigned char nop_combo[6] = {
> ^^^^^^^^^^^^^^^^^^^^^^
>
> Should nop_combo be declared const? It is a read-only lookup table
> used only in the memcmp() below.
Actually you either want 'static const unsigned char' or just 'unsigned char'.
In either case gcc optimises away the array, the call to memcmp(), and
just compares with 32bit and 64bit constants.
>
> > + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
> > + };
> > + unsigned char buf[6] = {};
No need to initialise buf[];
David
> > +
> > + if (pread(fd, buf, 6, off) != 6)
> > + return false;
> > + return memcmp(buf, nop_combo, 6) == 0;
> > +}
>
> [ ... ]
>
>
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
>
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/22221513473
>
> AI-authorship-score: low
> AI-authorship-explanation: Domain-specific optimization from established BPF/tracing maintainer with iterative lore review history and deep x86_64 uprobe internals knowledge.
> issues-found: 1
> issue-severity-score: low
> issue-severity-explanation: Missing const qualifier on a static read-only byte array is a minor C best practice deviation with no runtime impact.
On Sat, Feb 21, 2026 at 12:00:35PM +0000, David Laight wrote:
> On Fri, 20 Feb 2026 11:24:45 +0000 (UTC)
> bot+bpf-ci@kernel.org wrote:
>
> > > diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
> > > index d1524f6f54ae..c81e0a58caf5 100644
> > > --- a/tools/lib/bpf/usdt.c
> > > +++ b/tools/lib/bpf/usdt.c
> >
> > [ ... ]
> >
> > > +#if defined(__x86_64__)
> > > +static bool has_nop_combo(int fd, long off)
> > > +{
> > > + static unsigned char nop_combo[6] = {
> > ^^^^^^^^^^^^^^^^^^^^^^
> >
> > Should nop_combo be declared const? It is a read-only lookup table
> > used only in the memcmp() below.
>
> Actually you either want 'static const unsigned char' or just 'unsigned char'.
> In either case gcc optimises away the array, the call to memcmp(), and
> just compares with 32bit and 64bit constants.
ok, makes sense, will change
>
> >
> > > + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
> > > + };
> > > + unsigned char buf[6] = {};
>
> No need to initialise buf[];
right, thanks,
jirka
>
> David
>
> > > +
> > > + if (pread(fd, buf, 6, off) != 6)
> > > + return false;
> > > + return memcmp(buf, nop_combo, 6) == 0;
> > > +}
> >
> > [ ... ]
> >
> >
> > ---
> > AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> > See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> >
> > CI run summary: https://github.com/kernel-patches/bpf/actions/runs/22221513473
> >
> > AI-authorship-score: low
> > AI-authorship-explanation: Domain-specific optimization from established BPF/tracing maintainer with iterative lore review history and deep x86_64 uprobe internals knowledge.
> > issues-found: 1
> > issue-severity-score: low
> > issue-severity-explanation: Missing const qualifier on a static read-only byte array is a minor C best practice deviation with no runtime impact.
>
© 2016 - 2026 Red Hat, Inc.