[PATCH] audit: add backlog high water mark metric

Ricardo Robaina posted 1 patch 3 hours ago
include/linux/audit.h      |  3 ++-
include/uapi/linux/audit.h |  2 ++
kernel/audit.c             | 32 ++++++++++++++++++++++++++++++++
3 files changed, 36 insertions(+), 1 deletion(-)
[PATCH] audit: add backlog high water mark metric
Posted by Ricardo Robaina 3 hours ago
Currently, determining the optimal `audit_backlog_limit` relies on
instantaneous polling of the queue size. This misses transient
micro-bursts, making it difficult for system administrators to know
if their queue is adequately sized or if they are at risk of
dropping events.

This patch introduces `backlog_max_depth`, a high-water mark metric
that tracks the maximum number of buffers in the audit queue since
the system was booted or the metric was last reset. To minimize
performance overhead in the fast-path, the metric is updated using
a lockless cmpxchg loop in `__audit_log_end()`.

Userspace can read-and-clear this metric by sending an `AUDIT_SET`
message with the `AUDIT_STATUS_BACKLOG_MAX_DEPTH` mask. To support
periodic telemetry polling (e.g., statsd, Prometheus), the reset
operation atomically returns the snapshot of the high-water mark
right before zeroing it, ensuring no peaks are lost between polls.

Link: https://github.com/linux-audit/audit-kernel/issues/63
Suggested-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Ricardo Robaina <rrobaina@redhat.com>
---
 include/linux/audit.h      |  3 ++-
 include/uapi/linux/audit.h |  2 ++
 kernel/audit.c             | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index d79218bf075a..53132b303c20 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -22,7 +22,8 @@
 			  AUDIT_STATUS_BACKLOG_LIMIT | \
 			  AUDIT_STATUS_BACKLOG_WAIT_TIME | \
 			  AUDIT_STATUS_LOST | \
-			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL)
+			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL | \
+			  AUDIT_STATUS_BACKLOG_MAX_DEPTH)
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index e8f5ce677df7..862ca93c0c31 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -355,6 +355,7 @@ enum {
 #define AUDIT_STATUS_BACKLOG_WAIT_TIME		0x0020
 #define AUDIT_STATUS_LOST			0x0040
 #define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL	0x0080
+#define AUDIT_STATUS_BACKLOG_MAX_DEPTH		0x0100
 
 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
@@ -486,6 +487,7 @@ struct audit_status {
 	__u32           backlog_wait_time_actual;/* time spent waiting while
 						  * message limit exceeded
 						  */
+	__u32		backlog_max_depth; /* message queue max depth */
 };
 
 struct audit_features {
diff --git a/kernel/audit.c b/kernel/audit.c
index e1d489bc2dff..256053cb6132 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -163,6 +163,9 @@ static struct sk_buff_head audit_retry_queue;
 /* queue msgs waiting for new auditd connection */
 static struct sk_buff_head audit_hold_queue;
 
+/* audit queue high water mark since last startup or reset */
+static atomic_t audit_backlog_max_depth __read_mostly = ATOMIC_INIT(0);
+
 /* queue servicing thread */
 static struct task_struct *kauditd_task;
 static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
@@ -1286,6 +1289,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		s.backlog		   = skb_queue_len(&audit_queue);
 		s.feature_bitmap	   = AUDIT_FEATURE_BITMAP_ALL;
 		s.backlog_wait_time	   = audit_backlog_wait_time;
+		s.backlog_max_depth	   = atomic_read(&audit_backlog_max_depth);
 		s.backlog_wait_time_actual = atomic_read(&audit_backlog_wait_time_actual);
 		audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
 		break;
@@ -1399,6 +1403,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			audit_log_config_change("backlog_wait_time_actual", 0, actual, 1);
 			return actual;
 		}
+		if (s.mask == AUDIT_STATUS_BACKLOG_MAX_DEPTH) {
+			u32 old_depth = atomic_xchg(&audit_backlog_max_depth, 0);
+
+			audit_log_config_change("backlog_max_depth", 0, old_depth, 1);
+			return old_depth;
+		}
 		break;
 	}
 	case AUDIT_GET_FEATURE:
@@ -2761,6 +2771,25 @@ int audit_signal_info(int sig, struct task_struct *t)
 	return audit_signal_info_syscall(t);
 }
 
+/*
+ * audit_update_backlog_max_depth - update the audit queue high water mark
+ *
+ * Safely updates the audit_backlog_max_depth metric using a lockless
+ * cmpxchg loop. This ensures the high-water mark is accurately tracked
+ * even when multiple CPUs are logging audit records concurrently.
+ */
+static inline void audit_update_backlog_max_depth(void)
+{
+	u32 q_len = skb_queue_len(&audit_queue);
+	u32 q_max = atomic_read(&audit_backlog_max_depth);
+
+	while (unlikely(q_len > q_max)) {
+		if (likely(atomic_try_cmpxchg(&audit_backlog_max_depth,
+					      &q_max, q_len)))
+			break;
+	}
+}
+
 /**
  * __audit_log_end - enqueue one audit record
  * @skb: the buffer to send
@@ -2777,6 +2806,9 @@ static void __audit_log_end(struct sk_buff *skb)
 
 		/* queue the netlink packet */
 		skb_queue_tail(&audit_queue, skb);
+
+		/* update backlog high water mark */
+		audit_update_backlog_max_depth();
 	} else {
 		audit_log_lost("rate limit exceeded");
 		kfree_skb(skb);
-- 
2.53.0
Re: [PATCH] audit: add backlog high water mark metric
Posted by Steve Grubb an hour ago
On Monday, March 23, 2026 11:07:00 AM Eastern Daylight Time Ricardo Robaina 
wrote:
> Currently, determining the optimal `audit_backlog_limit` relies on
> instantaneous polling of the queue size. This misses transient
> micro-bursts, making it difficult for system administrators to know
> if their queue is adequately sized or if they are at risk of
> dropping events.
> 
> This patch introduces `backlog_max_depth`, a high-water mark metric
> that tracks the maximum number of buffers in the audit queue since
> the system was booted or the metric was last reset. To minimize
> performance overhead in the fast-path, the metric is updated using
> a lockless cmpxchg loop in `__audit_log_end()`.
> 
> Userspace can read-and-clear this metric by sending an `AUDIT_SET`
> message with the `AUDIT_STATUS_BACKLOG_MAX_DEPTH` mask. To support
> periodic telemetry polling (e.g., statsd, Prometheus), the reset
> operation atomically returns the snapshot of the high-water mark
> right before zeroing it, ensuring no peaks are lost between polls.

From a user space point of view, this looks good. User space support was co-
developed alongside of this patch to ensure it works as advertised.

Acked-by: Steve Grubb <sgrubb@redhat.com>

-Steve

> Link: https://github.com/linux-audit/audit-kernel/issues/63
> Suggested-by: Steve Grubb <sgrubb@redhat.com>
> Signed-off-by: Ricardo Robaina <rrobaina@redhat.com>
> ---
>  include/linux/audit.h      |  3 ++-
>  include/uapi/linux/audit.h |  2 ++
>  kernel/audit.c             | 32 ++++++++++++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index d79218bf075a..53132b303c20 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -22,7 +22,8 @@
>  			  AUDIT_STATUS_BACKLOG_LIMIT | \
>  			  AUDIT_STATUS_BACKLOG_WAIT_TIME | \
>  			  AUDIT_STATUS_LOST | \
> -			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL)
> +			  AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL | \
> +			  AUDIT_STATUS_BACKLOG_MAX_DEPTH)
> 
>  #define AUDIT_INO_UNSET ((unsigned long)-1)
>  #define AUDIT_DEV_UNSET ((dev_t)-1)
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index e8f5ce677df7..862ca93c0c31 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -355,6 +355,7 @@ enum {
>  #define AUDIT_STATUS_BACKLOG_WAIT_TIME		0x0020
>  #define AUDIT_STATUS_LOST			0x0040
>  #define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL	0x0080
> +#define AUDIT_STATUS_BACKLOG_MAX_DEPTH		0x0100
> 
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
> @@ -486,6 +487,7 @@ struct audit_status {
>  	__u32           backlog_wait_time_actual;/* time spent waiting while
>  						  * message limit exceeded
>  						  */
> +	__u32		backlog_max_depth; /* message queue max depth */
>  };
> 
>  struct audit_features {
> diff --git a/kernel/audit.c b/kernel/audit.c
> index e1d489bc2dff..256053cb6132 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -163,6 +163,9 @@ static struct sk_buff_head audit_retry_queue;
>  /* queue msgs waiting for new auditd connection */
>  static struct sk_buff_head audit_hold_queue;
> 
> +/* audit queue high water mark since last startup or reset */
> +static atomic_t audit_backlog_max_depth __read_mostly = ATOMIC_INIT(0);
> +
>  /* queue servicing thread */
>  static struct task_struct *kauditd_task;
>  static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
> @@ -1286,6 +1289,7 @@ static int audit_receive_msg(struct sk_buff *skb,
> struct nlmsghdr *nlh, s.backlog		   = skb_queue_len(&audit_queue);
>  		s.feature_bitmap	   = AUDIT_FEATURE_BITMAP_ALL;
>  		s.backlog_wait_time	   = audit_backlog_wait_time;
> +		s.backlog_max_depth	   = atomic_read(&audit_backlog_max_depth);
>  		s.backlog_wait_time_actual =
> atomic_read(&audit_backlog_wait_time_actual); audit_send_reply(skb, seq,
> AUDIT_GET, 0, 0, &s, sizeof(s));
>  		break;
> @@ -1399,6 +1403,12 @@ static int audit_receive_msg(struct sk_buff *skb,
> struct nlmsghdr *nlh, audit_log_config_change("backlog_wait_time_actual",
> 0, actual, 1); return actual;
>  		}
> +		if (s.mask == AUDIT_STATUS_BACKLOG_MAX_DEPTH) {
> +			u32 old_depth = atomic_xchg(&audit_backlog_max_depth, 0);
> +
> +			audit_log_config_change("backlog_max_depth", 0, old_depth, 
1);
> +			return old_depth;
> +		}
>  		break;
>  	}
>  	case AUDIT_GET_FEATURE:
> @@ -2761,6 +2771,25 @@ int audit_signal_info(int sig, struct task_struct
> *t) return audit_signal_info_syscall(t);
>  }
> 
> +/*
> + * audit_update_backlog_max_depth - update the audit queue high water mark
> + *
> + * Safely updates the audit_backlog_max_depth metric using a lockless
> + * cmpxchg loop. This ensures the high-water mark is accurately tracked
> + * even when multiple CPUs are logging audit records concurrently.
> + */
> +static inline void audit_update_backlog_max_depth(void)
> +{
> +	u32 q_len = skb_queue_len(&audit_queue);
> +	u32 q_max = atomic_read(&audit_backlog_max_depth);
> +
> +	while (unlikely(q_len > q_max)) {
> +		if (likely(atomic_try_cmpxchg(&audit_backlog_max_depth,
> +					      &q_max, q_len)))
> +			break;
> +	}
> +}
> +
>  /**
>   * __audit_log_end - enqueue one audit record
>   * @skb: the buffer to send
> @@ -2777,6 +2806,9 @@ static void __audit_log_end(struct sk_buff *skb)
> 
>  		/* queue the netlink packet */
>  		skb_queue_tail(&audit_queue, skb);
> +
> +		/* update backlog high water mark */
> +		audit_update_backlog_max_depth();
>  	} else {
>  		audit_log_lost("rate limit exceeded");
>  		kfree_skb(skb);