[RFC PATCH v2 3/5] mm: Add a tracepoint when OOM victim selection is failed

Chuyi Zhou posted 5 patches 2 years, 6 months ago
[RFC PATCH v2 3/5] mm: Add a tracepoint when OOM victim selection is failed
Posted by Chuyi Zhou 2 years, 6 months ago
This patch add a tracepoint to mark the scenario where nothing was
chosen for OOM killer. This would allow BPF programs to catch the fact
that the BPF OOM policy didn't work well.

Suggested-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
 include/trace/events/oom.h | 18 ++++++++++++++++++
 mm/oom_kill.c              |  1 +
 2 files changed, 19 insertions(+)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 26a11e4a2c36..b6ae1134229c 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -6,6 +6,7 @@
 #define _TRACE_OOM_H
 #include <linux/tracepoint.h>
 #include <trace/events/mmflags.h>
+#include <linux/oom.h>
 
 TRACE_EVENT(oom_score_adj_update,
 
@@ -151,6 +152,23 @@ TRACE_EVENT(skip_task_reaping,
 	TP_printk("pid=%d", __entry->pid)
 );
 
+TRACE_EVENT(select_bad_process_end,
+
+	TP_PROTO(struct oom_control *oc),
+
+	TP_ARGS(oc),
+
+	TP_STRUCT__entry(
+		__array(char, policy_name, POLICY_NAME_LEN)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->policy_name, oc->policy_name, POLICY_NAME_LEN);
+	),
+
+	TP_printk("policy_name=%s", __entry->policy_name)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3239dcdba4d7..af40a1b750fa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1235,6 +1235,7 @@ bool out_of_memory(struct oom_control *oc)
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
+		trace_select_bad_process_end(oc);
 		dump_header(oc, NULL);
 		pr_warn("Out of memory and no killable processes...\n");
 		/*
-- 
2.20.1
Re: [RFC PATCH v2 3/5] mm: Add a tracepoint when OOM victim selection is failed
Posted by Alan Maguire 2 years, 5 months ago
On 10/08/2023 09:13, Chuyi Zhou wrote:
> This patch add a tracepoint to mark the scenario where nothing was
> chosen for OOM killer. This would allow BPF programs to catch the fact
> that the BPF OOM policy didn't work well.
> 
> Suggested-by: Alan Maguire <alan.maguire@oracle.com>
> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
> ---
>  include/trace/events/oom.h | 18 ++++++++++++++++++
>  mm/oom_kill.c              |  1 +
>  2 files changed, 19 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 26a11e4a2c36..b6ae1134229c 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -6,6 +6,7 @@
>  #define _TRACE_OOM_H
>  #include <linux/tracepoint.h>
>  #include <trace/events/mmflags.h>
> +#include <linux/oom.h>
>  
>  TRACE_EVENT(oom_score_adj_update,
>  
> @@ -151,6 +152,23 @@ TRACE_EVENT(skip_task_reaping,
>  	TP_printk("pid=%d", __entry->pid)
>  );
>  
> +TRACE_EVENT(select_bad_process_end,
> +

would oom_select_bad_process_fail be a better name here?
"_end" is kind of neutral, whereas "_fail" indicates something
unexpected happened.

> +	TP_PROTO(struct oom_control *oc),
> +
> +	TP_ARGS(oc),
> +
> +	TP_STRUCT__entry(
> +		__array(char, policy_name, POLICY_NAME_LEN)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->policy_name, oc->policy_name, POLICY_NAME_LEN);
> +	),
> +
> +	TP_printk("policy_name=%s", __entry->policy_name)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 3239dcdba4d7..af40a1b750fa 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -1235,6 +1235,7 @@ bool out_of_memory(struct oom_control *oc)
>  	select_bad_process(oc);
>  	/* Found nothing?!?! */
>  	if (!oc->chosen) {
> +		trace_select_bad_process_end(oc);
>  		dump_header(oc, NULL);
>  		pr_warn("Out of memory and no killable processes...\n");
>  		/*