From nobody Mon Apr 6 13:28:26 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0B0ACC07E9D for ; Tue, 27 Sep 2022 16:01:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232550AbiI0QBo (ORCPT ); Tue, 27 Sep 2022 12:01:44 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:40944 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232316AbiI0QBf (ORCPT ); Tue, 27 Sep 2022 12:01:35 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 43F3658531 for ; Tue, 27 Sep 2022 09:01:32 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id D16DB61A25 for ; Tue, 27 Sep 2022 16:01:31 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 40348C433D7; Tue, 27 Sep 2022 16:01:31 +0000 (UTC) Received: from rostedt by gandalf.local.home with local (Exim 4.96) (envelope-from ) id 1odD2f-00G2na-26; Tue, 27 Sep 2022 12:02:41 -0400 Message-ID: <20220927160241.258139610@goodmis.org> User-Agent: quilt/0.66 Date: Tue, 27 Sep 2022 12:02:17 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org Cc: Daniel Bristot de Oliveira , Tzvetomir Stoyanov , Ingo Molnar , "Masami Hiramatsu (Google)" Subject: [for-next][PATCH 01/20] tracing/eprobe: Add eprobe filter support References: <20220927160216.349640304@goodmis.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Masami Hiramatsu (Google)" Add the filter option to the event probe. This is useful if user wants to derive a new event based on the condition of the original event. E.g. echo 'e:egroup/stat_runtime_4core sched/sched_stat_runtime \ runtime=3D$runtime:u32 if cpu < 4' >> ../dynamic_events Then it can filter the events only on first 4 cores. Note that the fields used for 'if' must be the fields in the original events, not eprobe events. Link: https://lkml.kernel.org/r/165932114513.2850673.2592206685744598080.st= git@devnote2 Cc: Tzvetomir Stoyanov Cc: Ingo Molnar Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 104 +++++++++++++++++++++++++++++++++--- kernel/trace/trace_probe.h | 3 +- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 1783e3478912..78299d3724a2 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -26,6 +26,9 @@ struct trace_eprobe { /* tracepoint event */ const char *event_name; =20 + /* filter string for the tracepoint */ + char *filter_str; + struct trace_event_call *event; =20 struct dyn_event devent; @@ -664,14 +667,15 @@ static struct event_trigger_data * new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) { struct event_trigger_data *trigger; + struct event_filter *filter =3D NULL; struct eprobe_data *edata; + int ret; =20 edata =3D kzalloc(sizeof(*edata), GFP_KERNEL); trigger =3D kzalloc(sizeof(*trigger), GFP_KERNEL); if (!trigger || !edata) { - kfree(edata); - kfree(trigger); - return ERR_PTR(-ENOMEM); + ret =3D -ENOMEM; + goto error; } =20 trigger->flags =3D EVENT_TRIGGER_FL_PROBE; @@ -686,13 +690,25 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct tr= ace_event_file *file) trigger->cmd_ops =3D &event_trigger_cmd; =20 INIT_LIST_HEAD(&trigger->list); - RCU_INIT_POINTER(trigger->filter, NULL); + + if (ep->filter_str) { + ret =3D create_event_filter(file->tr, file->event_call, + ep->filter_str, false, &filter); + if (ret) + goto error; + } + RCU_INIT_POINTER(trigger->filter, filter); =20 edata->file =3D file; edata->ep =3D ep; trigger->private_data =3D edata; =20 return trigger; +error: + free_event_filter(filter); + kfree(edata); + kfree(trigger); + return ERR_PTR(ret); } =20 static int enable_eprobe(struct trace_eprobe *ep, @@ -726,6 +742,7 @@ static int disable_eprobe(struct trace_eprobe *ep, { struct event_trigger_data *trigger =3D NULL, *iter; struct trace_event_file *file; + struct event_filter *filter; struct eprobe_data *edata; =20 file =3D find_event_file(tr, ep->event_system, ep->event_name); @@ -752,6 +769,10 @@ static int disable_eprobe(struct trace_eprobe *ep, /* Make sure nothing is using the edata or trigger */ tracepoint_synchronize_unregister(); =20 + filter =3D rcu_access_pointer(trigger->filter); + + if (filter) + free_event_filter(filter); kfree(edata); kfree(trigger); =20 @@ -927,12 +948,62 @@ static int trace_eprobe_tp_update_arg(struct trace_ep= robe *ep, const char *argv[ return ret; } =20 +static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, co= nst char *argv[]) +{ + struct event_filter *dummy; + int i, ret, len =3D 0; + char *p; + + if (argc =3D=3D 0) { + trace_probe_log_err(0, NO_EP_FILTER); + return -EINVAL; + } + + /* Recover the filter string */ + for (i =3D 0; i < argc; i++) + len +=3D strlen(argv[i]) + 1; + + ep->filter_str =3D kzalloc(len, GFP_KERNEL); + if (!ep->filter_str) + return -ENOMEM; + + p =3D ep->filter_str; + for (i =3D 0; i < argc; i++) { + ret =3D snprintf(p, len, "%s ", argv[i]); + if (ret < 0) + goto error; + if (ret > len) { + ret =3D -E2BIG; + goto error; + } + p +=3D ret; + len -=3D ret; + } + p[-1] =3D '\0'; + + /* + * Ensure the filter string can be parsed correctly. Note, this + * filter string is for the original event, not for the eprobe. + */ + ret =3D create_event_filter(top_trace_array(), ep->event, ep->filter_str, + true, &dummy); + free_event_filter(dummy); + if (ret) + goto error; + + return 0; +error: + kfree(ep->filter_str); + ep->filter_str =3D NULL; + return ret; +} + static int __trace_eprobe_create(int argc, const char *argv[]) { /* * Argument syntax: - * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] - * Fetch args: + * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER] + * Fetch args (no space): * =3D$[:TYPE] */ const char *event =3D NULL, *group =3D EPROBE_EVENT_SYSTEM; @@ -942,8 +1013,8 @@ static int __trace_eprobe_create(int argc, const char = *argv[]) char buf1[MAX_EVENT_NAME_LEN]; char buf2[MAX_EVENT_NAME_LEN]; char gbuf[MAX_EVENT_NAME_LEN]; - int ret =3D 0; - int i; + int ret =3D 0, filter_idx =3D 0; + int i, filter_cnt; =20 if (argc < 2 || argv[0][0] !=3D 'e') return -ECANCELED; @@ -973,6 +1044,15 @@ static int __trace_eprobe_create(int argc, const char= *argv[]) event =3D buf1; } =20 + for (i =3D 2; i < argc; i++) { + if (!strcmp(argv[i], "if")) { + filter_idx =3D i + 1; + filter_cnt =3D argc - filter_idx; + argc =3D i; + break; + } + } + mutex_lock(&event_mutex); event_call =3D find_and_get_event(sys_name, sys_event); ep =3D alloc_event_probe(group, event, event_call, argc - 2); @@ -988,6 +1068,14 @@ static int __trace_eprobe_create(int argc, const char= *argv[]) goto error; } =20 + if (filter_idx) { + trace_probe_log_set_index(filter_idx); + ret =3D trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx); + if (ret) + goto parse_error; + } else + ep->filter_str =3D NULL; + argc -=3D 2; argv +=3D 2; /* parse arguments */ for (i =3D 0; i < argc && i < MAX_TRACE_ARGS; i++) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 3b3869ae8cfd..de38f1c03776 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -445,7 +445,8 @@ extern int traceprobe_define_arg_fields(struct trace_ev= ent_call *event_call, C(SAME_PROBE, "There is already the exact same probe event"),\ C(NO_EVENT_INFO, "This requires both group and event name to attach"),\ C(BAD_ATTACH_EVENT, "Attached event does not exist"),\ - C(BAD_ATTACH_ARG, "Attached event does not have this field"), + C(BAD_ATTACH_ARG, "Attached event does not have this field"),\ + C(NO_EP_FILTER, "No filter rule after 'if'"), =20 #undef C #define C(a, b) TP_ERR_##a --=20 2.35.1