From nobody Fri Apr 10 14:23:00 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 218CDECAAA1 for ; Tue, 6 Sep 2022 20:10:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231230AbiIFUKm (ORCPT ); Tue, 6 Sep 2022 16:10:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55358 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230087AbiIFUKP (ORCPT ); Tue, 6 Sep 2022 16:10:15 -0400 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 35E3DBC80F; Tue, 6 Sep 2022 13:05:14 -0700 (PDT) Received: from pwmachine.numericable.fr (85-170-34-72.rev.numericable.fr [85.170.34.72]) by linux.microsoft.com (Postfix) with ESMTPSA id C7ACA20B929C; Tue, 6 Sep 2022 12:58:21 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com C7ACA20B929C DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1662494306; bh=Y3tZA2UfXIUdO651eZpQHa6bop0UrWXdjXGmdZD7XUw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=I0ImiMk2OSzmHAF9EcIea/Iv6FlA7LB/hWbPy9LpmWBxA/OWCrauZk3R6t9VWJC0j 3itEgHNH7uQHYpyP7WU01yZH1+GyGnywJA1qfj5ePpesxIZc6DIPD0paS1A2iERyfe wJx8sQQLjg4QvCTa3T51mNdXaCtjSca+V+FF1wrE= From: Francis Laniel To: bpf@vger.kernel.org Cc: Francis Laniel , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Jonathan Corbet , Mykola Lysenko , Shuah Khan , Joanne Koong , Dave Marchevsky , Lorenzo Bianconi , Maxim Mikityanskiy , Geliang Tang , "Naveen N. Rao" , linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Subject: [RFC PATCH v2 1/5] bpf: Make ring buffer overwritable. Date: Tue, 6 Sep 2022 21:56:42 +0200 Message-Id: <20220906195656.33021-2-flaniel@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220906195656.33021-1-flaniel@linux.microsoft.com> References: <20220906195656.33021-1-flaniel@linux.microsoft.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" By default, BPF ring buffer are size bounded, when producers already filled= the buffer, they need to wait for the consumer to get those data before adding = new ones. In terms of API, bpf_ringbuf_reserve() returns NULL if the buffer is full. This patch permits making BPF ring buffer overwritable. When producers already wrote as many data as the buffer size, they will beg= in to over write existing data, so the oldest will be replaced. As a result, bpf_ringbuf_reserve() never returns NULL. To avoid memory consumption, this patch writes data backward like overwrita= ble perf ring buffer added in commit 9ecda41acb97 ("perf/core: Add ::write_backward attribute to perf eve= nt"). Signed-off-by: Francis Laniel --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/ringbuf.c | 43 ++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59a217ca2dfd..c87a667649ab 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1227,6 +1227,9 @@ enum { =20 /* Create a map that is suitable to be an inner map with dynamic max entri= es */ BPF_F_INNER_MAP =3D (1U << 12), + +/* Create an overwritable BPF_RINGBUF */ + BFP_F_RB_OVERWRITABLE =3D (1U << 13), }; =20 /* Flags for BPF_PROG_QUERY. */ diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index ded4faeca192..369c61cfe8aa 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -12,7 +12,7 @@ #include #include =20 -#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) +#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE | BFP_F_RB_OVERWRITABLE) =20 /* non-mmap()'able part of bpf_ringbuf (everything up to consumer page) */ #define RINGBUF_PGOFF \ @@ -37,6 +37,8 @@ struct bpf_ringbuf { u64 mask; struct page **pages; int nr_pages; + __u8 overwritable: 1, + __reserved: 7; spinlock_t spinlock ____cacheline_aligned_in_smp; /* Consumer and producer counters are put into separate pages to allow * mapping consumer page as r/w, but restrict producer page to r/o. @@ -127,7 +129,12 @@ static void bpf_ringbuf_notify(struct irq_work *work) wake_up_all(&rb->waitq); } =20 -static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) +static inline bool is_overwritable(struct bpf_ringbuf *rb) +{ + return !!rb->overwritable; +} + +static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node= , __u32 flags) { struct bpf_ringbuf *rb; =20 @@ -142,6 +149,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t dat= a_sz, int numa_node) rb->mask =3D data_sz - 1; rb->consumer_pos =3D 0; rb->producer_pos =3D 0; + rb->overwritable =3D !!(flags & BFP_F_RB_OVERWRITABLE); =20 return rb; } @@ -170,7 +178,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr= *attr) =20 bpf_map_init_from_attr(&rb_map->map, attr); =20 - rb_map->rb =3D bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node= ); + rb_map->rb =3D bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node= , attr->map_flags); if (!rb_map->rb) { kfree(rb_map); return ERR_PTR(-ENOMEM); @@ -248,6 +256,7 @@ static unsigned long ringbuf_avail_data_sz(struct bpf_r= ingbuf *rb) =20 cons_pos =3D smp_load_acquire(&rb->consumer_pos); prod_pos =3D smp_load_acquire(&rb->producer_pos); + return prod_pos - cons_pos; } =20 @@ -325,14 +334,24 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf= *rb, u64 size) } =20 prod_pos =3D rb->producer_pos; - new_prod_pos =3D prod_pos + len; =20 - /* check for out of ringbuf space by ensuring producer position - * doesn't advance more than (ringbuf_size - 1) ahead - */ - if (new_prod_pos - cons_pos > rb->mask) { - spin_unlock_irqrestore(&rb->spinlock, flags); - return NULL; + if (!is_overwritable(rb)) { + new_prod_pos =3D prod_pos + len; + + /* check for out of ringbuf space by ensuring producer position + * doesn't advance more than (ringbuf_size - 1) ahead + */ + if (new_prod_pos - cons_pos > rb->mask) { + spin_unlock_irqrestore(&rb->spinlock, flags); + return NULL; + } + } else { + /* + * With overwritable ring buffer we go from the end toward the + * beginning. + */ + prod_pos -=3D len; + new_prod_pos =3D prod_pos; } =20 hdr =3D (void *)rb->data + (prod_pos & rb->mask); @@ -457,10 +476,14 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, = u64, flags) =20 switch (flags) { case BPF_RB_AVAIL_DATA: + if (is_overwritable(rb)) + return -EINVAL; return ringbuf_avail_data_sz(rb); case BPF_RB_RING_SIZE: return rb->mask + 1; case BPF_RB_CONS_POS: + if (is_overwritable(rb)) + return -EINVAL; return smp_load_acquire(&rb->consumer_pos); case BPF_RB_PROD_POS: return smp_load_acquire(&rb->producer_pos); --=20 2.25.1