include/linux/cgroup-defs.h | 16 ++++++++++------ include/linux/cgroup.h | 8 +++----- kernel/cgroup/cgroup.c | 7 +++---- net/netfilter/nft_socket.c | 9 +++++---- tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 2 +- 5 files changed, 22 insertions(+), 20 deletions(-)
Every cgroup knows all its ancestors through its ->ancestor_ids[]. There's
no advantage to remembering the IDs instead of the pointers directly and
this makes the array useless for finding an actual ancestor cgroup forcing
cgroup_ancestor() to iteratively walk up the hierarchy instead. Let's
replace cgroup->ancestor_ids[] with ->ancestors[] and remove the walking-up
from cgroup_ancestor().
While at it, improve comments around cgroup_root->cgrp_ancestor_storage.
This patch shouldn't cause user-visible behavior differences.
v2: Update cgroup_ancestor() to use ->ancestors[].
v3: cgroup_root->cgrp_ancestor_storage's type is updated to match
cgroup->ancestors[]. Better comments.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
---
include/linux/cgroup-defs.h | 16 ++++++++++------
include/linux/cgroup.h | 8 +++-----
kernel/cgroup/cgroup.c | 7 +++----
net/netfilter/nft_socket.c | 9 +++++----
tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 2 +-
5 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 63bf43c7ca3b..52a3c47c89bc 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -379,7 +379,7 @@ struct cgroup {
/*
* The depth this cgroup is at. The root is at depth zero and each
* step down the hierarchy increments the level. This along with
- * ancestor_ids[] can determine whether a given cgroup is a
+ * ancestors[] can determine whether a given cgroup is a
* descendant of another without traversing the hierarchy.
*/
int level;
@@ -499,8 +499,8 @@ struct cgroup {
/* Used to store internal freezer state */
struct cgroup_freezer_state freezer;
- /* ids of the ancestors at each level including self */
- u64 ancestor_ids[];
+ /* All ancestors including self */
+ struct cgroup *ancestors[];
};
/*
@@ -517,11 +517,15 @@ struct cgroup_root {
/* Unique id for this hierarchy. */
int hierarchy_id;
- /* The root cgroup. Root is destroyed on its release. */
+ /*
+ * The root cgroup. The containing cgroup_root will be destroyed on its
+ * release. cgrp->ancestors[0] will be used overflowing into the
+ * following field. cgrp_ancestor_storage must immediately follow.
+ */
struct cgroup cgrp;
- /* for cgrp->ancestor_ids[0] */
- u64 cgrp_ancestor_id_storage;
+ /* must follow cgrp for cgrp->ancestors[0], see above */
+ struct cgroup *cgrp_ancestor_storage;
/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
atomic_t nr_cgrps;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed53bfe7c46c..4d143729b246 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -574,7 +574,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
{
if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
return false;
- return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor);
+ return cgrp->ancestors[ancestor->level] == ancestor;
}
/**
@@ -591,11 +591,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
int ancestor_level)
{
- if (cgrp->level < ancestor_level)
+ if (ancestor_level < 0 || ancestor_level > cgrp->level)
return NULL;
- while (cgrp && cgrp->level > ancestor_level)
- cgrp = cgroup_parent(cgrp);
- return cgrp;
+ return cgrp->ancestors[ancestor_level];
}
/**
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 85fa4c8587a8..ce587fe43dab 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2047,7 +2047,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
}
root_cgrp->kn = kernfs_root_to_node(root->kf_root);
WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
- root_cgrp->ancestor_ids[0] = cgroup_id(root_cgrp);
+ root_cgrp->ancestors[0] = root_cgrp;
ret = css_populate_dir(&root_cgrp->self);
if (ret)
@@ -5391,8 +5391,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
int ret;
/* allocate the cgroup and its ID, 0 is reserved for the root */
- cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
- GFP_KERNEL);
+ cgrp = kzalloc(struct_size(cgrp, ancestors, (level + 1)), GFP_KERNEL);
if (!cgrp)
return ERR_PTR(-ENOMEM);
@@ -5444,7 +5443,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
spin_lock_irq(&css_set_lock);
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
- cgrp->ancestor_ids[tcgrp->level] = cgroup_id(tcgrp);
+ cgrp->ancestors[tcgrp->level] = tcgrp;
if (tcgrp != cgrp) {
tcgrp->nr_descendants++;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 05ae5a338b6f..d982a7c22a77 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -40,16 +40,17 @@ static noinline bool
nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
{
struct cgroup *cgrp;
+ u64 cgid;
if (!sk_fullsock(sk))
return false;
- cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- if (level > cgrp->level)
+ cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level);
+ if (!cgrp)
return false;
- memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64));
-
+ cgid = cgroup_id(cgrp);
+ memcpy(dest, &cgid, sizeof(u64));
return true;
}
#endif
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 292c430768b5..bd6a420acc8f 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -68,7 +68,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
break;
// convert cgroup-id to a map index
- cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
+ cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
if (!elem)
continue;
On Fri, Jul 29, 2022 at 01:10:16PM -1000, Tejun Heo wrote: > Every cgroup knows all its ancestors through its ->ancestor_ids[]. There's > no advantage to remembering the IDs instead of the pointers directly and > this makes the array useless for finding an actual ancestor cgroup forcing > cgroup_ancestor() to iteratively walk up the hierarchy instead. Let's > replace cgroup->ancestor_ids[] with ->ancestors[] and remove the walking-up > from cgroup_ancestor(). > > While at it, improve comments around cgroup_root->cgrp_ancestor_storage. > > This patch shouldn't cause user-visible behavior differences. > > v2: Update cgroup_ancestor() to use ->ancestors[]. > > v3: cgroup_root->cgrp_ancestor_storage's type is updated to match > cgroup->ancestors[]. Better comments. > > Signed-off-by: Tejun Heo <tj@kernel.org> > Acked-by: Namhyung Kim <namhyung@kernel.org> Applied to cgroup/for-6.1. -- tejun
Hi Tejun, On Mon, Aug 15, 2022 at 2:17 PM Tejun Heo <tj@kernel.org> wrote: > > On Fri, Jul 29, 2022 at 01:10:16PM -1000, Tejun Heo wrote: > > Every cgroup knows all its ancestors through its ->ancestor_ids[]. There's > > no advantage to remembering the IDs instead of the pointers directly and > > this makes the array useless for finding an actual ancestor cgroup forcing > > cgroup_ancestor() to iteratively walk up the hierarchy instead. Let's > > replace cgroup->ancestor_ids[] with ->ancestors[] and remove the walking-up > > from cgroup_ancestor(). > > > > While at it, improve comments around cgroup_root->cgrp_ancestor_storage. > > > > This patch shouldn't cause user-visible behavior differences. > > > > v2: Update cgroup_ancestor() to use ->ancestors[]. > > > > v3: cgroup_root->cgrp_ancestor_storage's type is updated to match > > cgroup->ancestors[]. Better comments. > > > > Signed-off-by: Tejun Heo <tj@kernel.org> > > Acked-by: Namhyung Kim <namhyung@kernel.org> > > Applied to cgroup/for-6.1. I've realized that perf stat change needs backward compatibility. Will send a fix soon. Thanks, Namhyung
The recent change in the cgroup will break the backward compatiblity in
the BPF program. It should support both old and new kernels using BPF
CO-RE technique.
Like the task_struct->__state handling in the offcpu analysis, we can
check the field name in the cgroup struct.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
Arnaldo, I think this should go through the cgroup tree since it depends
on the earlier change there. I don't think it'd conflict with other
perf changes but please let me know if you see any trouble, thanks!
tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 488bd398f01d..4fe61043de04 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -43,12 +43,39 @@ struct {
__uint(value_size, sizeof(struct bpf_perf_event_value));
} cgrp_readings SEC(".maps");
+/* new kernel cgroup definition */
+struct cgroup___new {
+ int level;
+ struct cgroup *ancestors[];
+} __attribute__((preserve_access_index));
+
+/* old kernel cgroup definition */
+struct cgroup___old {
+ int level;
+ u64 ancestor_ids[];
+} __attribute__((preserve_access_index));
+
const volatile __u32 num_events = 1;
const volatile __u32 num_cpus = 1;
int enabled = 0;
int use_cgroup_v2 = 0;
+static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
+{
+ /* recast pointer to capture new type for compiler */
+ struct cgroup___new *cgrp_new = (void *)cgrp;
+
+ if (bpf_core_field_exists(cgrp_new->ancestors)) {
+ return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct cgroup___old *cgrp_old = (void *)cgrp;
+
+ return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
+ }
+}
+
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
{
struct task_struct *p = (void *)bpf_get_current_task();
@@ -70,7 +97,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
break;
// convert cgroup-id to a map index
- cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
+ cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
if (!elem)
continue;
--
2.37.3.968.ga6b4b080e4-goog
On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > The recent change in the cgroup will break the backward compatiblity in > the BPF program. It should support both old and new kernels using BPF > CO-RE technique. > > Like the task_struct->__state handling in the offcpu analysis, we can > check the field name in the cgroup struct. > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> Looks like it's acked enough but the patch doesn't apply anymore. Namhyung, can you please refresh the patch? I'll route this through cgroup/for-6.1-fixes unless somebody objects. Thanks. -- tejun
Hi Tejun, On Mon, Oct 10, 2022 at 4:59 PM Tejun Heo <tj@kernel.org> wrote: > > On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > > The recent change in the cgroup will break the backward compatiblity in > > the BPF program. It should support both old and new kernels using BPF > > CO-RE technique. > > > > Like the task_struct->__state handling in the offcpu analysis, we can > > check the field name in the cgroup struct. > > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > > Looks like it's acked enough but the patch doesn't apply anymore. Namhyung, > can you please refresh the patch? I'll route this through > cgroup/for-6.1-fixes unless somebody objects. Sorry about the conflict. There was another change to get the perf cgroup subsys id properly. Will send v2 soon. Thanks, Namhyung
The recent change in the cgroup will break the backward compatiblity in
the BPF program. It should support both old and new kernels using BPF
CO-RE technique.
Like the task_struct->__state handling in the offcpu analysis, we can
check the field name in the cgroup struct.
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 435a87556688..6a438e0102c5 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -43,6 +43,18 @@ struct {
__uint(value_size, sizeof(struct bpf_perf_event_value));
} cgrp_readings SEC(".maps");
+/* new kernel cgroup definition */
+struct cgroup___new {
+ int level;
+ struct cgroup *ancestors[];
+} __attribute__((preserve_access_index));
+
+/* old kernel cgroup definition */
+struct cgroup___old {
+ int level;
+ u64 ancestor_ids[];
+} __attribute__((preserve_access_index));
+
const volatile __u32 num_events = 1;
const volatile __u32 num_cpus = 1;
@@ -50,6 +62,21 @@ int enabled = 0;
int use_cgroup_v2 = 0;
int perf_subsys_id = -1;
+static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
+{
+ /* recast pointer to capture new type for compiler */
+ struct cgroup___new *cgrp_new = (void *)cgrp;
+
+ if (bpf_core_field_exists(cgrp_new->ancestors)) {
+ return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct cgroup___old *cgrp_old = (void *)cgrp;
+
+ return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
+ }
+}
+
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
{
struct task_struct *p = (void *)bpf_get_current_task();
@@ -77,7 +104,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
break;
// convert cgroup-id to a map index
- cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
+ cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
if (!elem)
continue;
--
2.38.0.rc1.362.ged0d419d3c-goog
On Mon, Oct 10, 2022 at 10:28:08PM -0700, Namhyung Kim wrote: > The recent change in the cgroup will break the backward compatiblity in > the BPF program. It should support both old and new kernels using BPF > CO-RE technique. > > Like the task_struct->__state handling in the offcpu analysis, we can > check the field name in the cgroup struct. > > Acked-by: Jiri Olsa <jolsa@kernel.org> > Acked-by: Andrii Nakryiko <andrii@kernel.org> > Signed-off-by: Namhyung Kim <namhyung@kernel.org> Applied to cgroup/for-6.1-fixes. Thanks. -- tejun
Em Tue, Oct 11, 2022 at 06:53:43AM -1000, Tejun Heo escreveu: > On Mon, Oct 10, 2022 at 10:28:08PM -0700, Namhyung Kim wrote: > > The recent change in the cgroup will break the backward compatiblity in > > the BPF program. It should support both old and new kernels using BPF > > CO-RE technique. > > Like the task_struct->__state handling in the offcpu analysis, we can > > check the field name in the cgroup struct. > > Acked-by: Jiri Olsa <jolsa@kernel.org> > > Acked-by: Andrii Nakryiko <andrii@kernel.org> > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > Applied to cgroup/for-6.1-fixes. Hey, I noticed that the perf build is broken for the tools/perf/util/bpf_skel/bperf_cgroup.bpf.c skell, so I tried using b4 on this Namhyung patch, it ended up getting a newer version, by Tejun, that mixes up kernel code and tooling, which, when I tried to apply upstream didn't work. Please try not to mix up kernel and tools/ changes in the same patch to avoid these issues. Also when changing tools/perf, please CC me. I'm now back trying to apply v2 of this patch to see if it fixes my build. Thanks, - Arnaldo
On Fri, Oct 14, 2022 at 10:27:40AM -0300, Arnaldo Carvalho de Melo wrote: > Hey, I noticed that the perf build is broken for the > tools/perf/util/bpf_skel/bperf_cgroup.bpf.c skell, so I tried using b4 > on this Namhyung patch, it ended up getting a newer version, by Tejun, > that mixes up kernel code and tooling, which, when I tried to apply > upstream didn't work. > > Please try not to mix up kernel and tools/ changes in the same patch to > avoid these issues. I didn't write a newer version of this patch. What are you talking about? -- tejun
Em Fri, Oct 14, 2022 at 06:40:56AM -1000, Tejun Heo escreveu:
> On Fri, Oct 14, 2022 at 10:27:40AM -0300, Arnaldo Carvalho de Melo wrote:
> > Hey, I noticed that the perf build is broken for the
> > tools/perf/util/bpf_skel/bperf_cgroup.bpf.c skell, so I tried using b4
> > on this Namhyung patch, it ended up getting a newer version, by Tejun,
> > that mixes up kernel code and tooling, which, when I tried to apply
> > upstream didn't work.
> > Please try not to mix up kernel and tools/ changes in the same patch to
> > avoid these issues.
> I didn't write a newer version of this patch. What are you talking about?
So, I saw this message from you in reply to Namhyung's v2 patch:
--------------------------
Date: Tue, 11 Oct 2022 06:53:43 -1000
From: Tejun Heo <tj@kernel.org>
Subject: Re: [PATCH v2] perf stat: Support old kernels for bperf cgroup counting
To: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>, Zefan Li <lizefan.x@bytedance.com>, Johannes Weiner <hannes@cmpxchg.org>, cgroups@vger.kernel.org, Jiri Olsa <jolsa@kernel.org>, LKML <linux-kernel@vger.kernel.org>, linux-perf-users@vger.kernel.org, Song Liu
<songliubraving@fb.com>, bpf@vger.kernel.org, Andrii Nakryiko <andrii@kernel.org>
Sender: Tejun Heo <htejun@gmail.com>
Message-ID: <Y0Wfl88objrECjSo@slm.duckdns.org>
On Mon, Oct 10, 2022 at 10:28:08PM -0700, Namhyung Kim wrote:
> The recent change in the cgroup will break the backward compatiblity in
> the BPF program. It should support both old and new kernels using BPF
> CO-RE technique.
> Like the task_struct->__state handling in the offcpu analysis, we can
> check the field name in the cgroup struct.
> Acked-by: Jiri Olsa <jolsa@kernel.org>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Applied to cgroup/for-6.1-fixes.
Thanks.
--
tejun
--------------------------
So, I picked the message id, Y0Wfl88objrECjSo@slm.duckdns.org, and asked
b4 to pick the patch:
⬢[acme@toolbox perf]$ b4 am --help | grep -A1 -- -c,
-c, --check-newer-revisions
Check if newer patch revisions exist
⬢[acme@toolbox perf]$
⬢[acme@toolbox perf]$ b4 am -ctsl --cc-trailers Y0Wfl88objrECjSo@slm.duckdns.org
Grabbing thread from lore.kernel.org/all/Y0Wfl88objrECjSo%40slm.duckdns.org/t.mbox.gz
Checking for newer revisions on https://lore.kernel.org/all/
Analyzing 27 messages in the thread
('Acked-by', 'Andrii Nakryiko <andrii@kernel.org>', None)
Will use the latest revision: v3
You can pick other revisions using the -vN flag
Checking attestation on all messages, may take a moment...
---
✓ [PATCH v3] cgroup: Replace cgroup->ancestor_ids[] with ->ancestors[]
---
✓ Signed: DKIM/gmail.com (From: tj@kernel.org)
---
Total patches: 1
---
Link: https://lore.kernel.org/r/YuRo2PLFH6wLgEkm@slm.duckdns.org
Base: not specified
git am ./v3_20220729_tj_cgroup_replace_cgroup_ancestor_ids_with_ancestors.mbx
⬢[acme@toolbox perf]$
Which got me this:
⬢[acme@toolbox perf]$ diffstat ./v3_20220729_tj_cgroup_replace_cgroup_ancestor_ids_with_ancestors.mbx
include/linux/cgroup-defs.h | 16 ++++++++++------
include/linux/cgroup.h | 8 +++-----
kernel/cgroup/cgroup.c | 7 +++----
net/netfilter/nft_socket.c | 9 +++++----
tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 2 +-
5 files changed, 22 insertions(+), 20 deletions(-)
⬢[acme@toolbox perf]$
⬢[acme@toolbox perf]$ grep From: ./v3_20220729_tj_cgroup_replace_cgroup_ancestor_ids_with_ancestors.mbx
From: Tejun Heo <tj@kernel.org>
⬢[acme@toolbox perf]$
That mixes kernel and tools bits and touches
tools/perf/util/bpf_skel/bperf_cgroup.bpf.c, hence my request to add me
to the CC list for patches touching tools/perf/.
My assumption that it was a new patch was because b4 somehow got to
v3_20220729_tj_cgroup_replace_cgroup_ancestor_ids_with_ancestors,
which has v3 and touches the tools cgroup bpf skel.
So it seems b4 is confused somehow.
Hope this clarifies.
- Arnaldo
Em Fri, Oct 14, 2022 at 10:27:40AM -0300, Arnaldo Carvalho de Melo escreveu: > Em Tue, Oct 11, 2022 at 06:53:43AM -1000, Tejun Heo escreveu: > > On Mon, Oct 10, 2022 at 10:28:08PM -0700, Namhyung Kim wrote: > > > The recent change in the cgroup will break the backward compatiblity in > > > the BPF program. It should support both old and new kernels using BPF > > > CO-RE technique. > > > > Like the task_struct->__state handling in the offcpu analysis, we can > > > check the field name in the cgroup struct. > > > > Acked-by: Jiri Olsa <jolsa@kernel.org> > > > Acked-by: Andrii Nakryiko <andrii@kernel.org> > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > > > Applied to cgroup/for-6.1-fixes. > > Hey, I noticed that the perf build is broken for the > tools/perf/util/bpf_skel/bperf_cgroup.bpf.c skell, so I tried using b4 > on this Namhyung patch, it ended up getting a newer version, by Tejun, > that mixes up kernel code and tooling, which, when I tried to apply > upstream didn't work. > > Please try not to mix up kernel and tools/ changes in the same patch to > avoid these issues. > > Also when changing tools/perf, please CC me. > > I'm now back trying to apply v2 of this patch to see if it fixes my > build. Yeah, applying just Namhyung's v2 patch gets perf back building, I'll keep it there while processing the other patches so that I can test them all together. - Arnaldo
On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote:
> The recent change in the cgroup will break the backward compatiblity in
> the BPF program. It should support both old and new kernels using BPF
> CO-RE technique.
>
> Like the task_struct->__state handling in the offcpu analysis, we can
> check the field name in the cgroup struct.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
lgtm
Acked-by: Jiri Olsa <jolsa@kernel.org>
jirka
> ---
> Arnaldo, I think this should go through the cgroup tree since it depends
> on the earlier change there. I don't think it'd conflict with other
> perf changes but please let me know if you see any trouble, thanks!
>
> tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
> 1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> index 488bd398f01d..4fe61043de04 100644
> --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> @@ -43,12 +43,39 @@ struct {
> __uint(value_size, sizeof(struct bpf_perf_event_value));
> } cgrp_readings SEC(".maps");
>
> +/* new kernel cgroup definition */
> +struct cgroup___new {
> + int level;
> + struct cgroup *ancestors[];
> +} __attribute__((preserve_access_index));
> +
> +/* old kernel cgroup definition */
> +struct cgroup___old {
> + int level;
> + u64 ancestor_ids[];
> +} __attribute__((preserve_access_index));
> +
> const volatile __u32 num_events = 1;
> const volatile __u32 num_cpus = 1;
>
> int enabled = 0;
> int use_cgroup_v2 = 0;
>
> +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
> +{
> + /* recast pointer to capture new type for compiler */
> + struct cgroup___new *cgrp_new = (void *)cgrp;
> +
> + if (bpf_core_field_exists(cgrp_new->ancestors)) {
> + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
> + } else {
> + /* recast pointer to capture old type for compiler */
> + struct cgroup___old *cgrp_old = (void *)cgrp;
> +
> + return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
> + }
> +}
> +
> static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> {
> struct task_struct *p = (void *)bpf_get_current_task();
> @@ -70,7 +97,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> break;
>
> // convert cgroup-id to a map index
> - cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
> + cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
> elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
> if (!elem)
> continue;
> --
> 2.37.3.968.ga6b4b080e4-goog
>
On Wed, Sep 21, 2022 at 9:21 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> The recent change in the cgroup will break the backward compatiblity in
> the BPF program. It should support both old and new kernels using BPF
> CO-RE technique.
>
> Like the task_struct->__state handling in the offcpu analysis, we can
> check the field name in the cgroup struct.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> Arnaldo, I think this should go through the cgroup tree since it depends
> on the earlier change there. I don't think it'd conflict with other
> perf changes but please let me know if you see any trouble, thanks!
>
> tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
> 1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> index 488bd398f01d..4fe61043de04 100644
> --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> @@ -43,12 +43,39 @@ struct {
> __uint(value_size, sizeof(struct bpf_perf_event_value));
> } cgrp_readings SEC(".maps");
>
> +/* new kernel cgroup definition */
> +struct cgroup___new {
> + int level;
> + struct cgroup *ancestors[];
> +} __attribute__((preserve_access_index));
> +
> +/* old kernel cgroup definition */
> +struct cgroup___old {
> + int level;
> + u64 ancestor_ids[];
> +} __attribute__((preserve_access_index));
> +
> const volatile __u32 num_events = 1;
> const volatile __u32 num_cpus = 1;
>
> int enabled = 0;
> int use_cgroup_v2 = 0;
>
> +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
> +{
> + /* recast pointer to capture new type for compiler */
> + struct cgroup___new *cgrp_new = (void *)cgrp;
> +
> + if (bpf_core_field_exists(cgrp_new->ancestors)) {
> + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
have you checked generated BPF code for this ancestors[level] access?
I'd expect CO-RE relocation for finding ancestors offset and then just
normal + level * 8 arithmetic, but would be nice to confirm. Apart
from this, looks good to me:
Acked-by: Andrii Nakryiko <andrii@kernel.org>
> + } else {
> + /* recast pointer to capture old type for compiler */
> + struct cgroup___old *cgrp_old = (void *)cgrp;
> +
> + return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
> + }
> +}
> +
> static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> {
> struct task_struct *p = (void *)bpf_get_current_task();
> @@ -70,7 +97,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> break;
>
> // convert cgroup-id to a map index
> - cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
> + cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
> elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
> if (!elem)
> continue;
> --
> 2.37.3.968.ga6b4b080e4-goog
>
Hello,
On Fri, Sep 30, 2022 at 3:48 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 21, 2022 at 9:21 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > The recent change in the cgroup will break the backward compatiblity in
> > the BPF program. It should support both old and new kernels using BPF
> > CO-RE technique.
> >
> > Like the task_struct->__state handling in the offcpu analysis, we can
> > check the field name in the cgroup struct.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > Arnaldo, I think this should go through the cgroup tree since it depends
> > on the earlier change there. I don't think it'd conflict with other
> > perf changes but please let me know if you see any trouble, thanks!
> >
> > tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
> > 1 file changed, 28 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > index 488bd398f01d..4fe61043de04 100644
> > --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > @@ -43,12 +43,39 @@ struct {
> > __uint(value_size, sizeof(struct bpf_perf_event_value));
> > } cgrp_readings SEC(".maps");
> >
> > +/* new kernel cgroup definition */
> > +struct cgroup___new {
> > + int level;
> > + struct cgroup *ancestors[];
> > +} __attribute__((preserve_access_index));
> > +
> > +/* old kernel cgroup definition */
> > +struct cgroup___old {
> > + int level;
> > + u64 ancestor_ids[];
> > +} __attribute__((preserve_access_index));
> > +
> > const volatile __u32 num_events = 1;
> > const volatile __u32 num_cpus = 1;
> >
> > int enabled = 0;
> > int use_cgroup_v2 = 0;
> >
> > +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
> > +{
> > + /* recast pointer to capture new type for compiler */
> > + struct cgroup___new *cgrp_new = (void *)cgrp;
> > +
> > + if (bpf_core_field_exists(cgrp_new->ancestors)) {
> > + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
>
> have you checked generated BPF code for this ancestors[level] access?
> I'd expect CO-RE relocation for finding ancestors offset and then just
> normal + level * 8 arithmetic, but would be nice to confirm. Apart
> from this, looks good to me:
>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
Thanks for your review!
How can I check the generated code? Do you have something works with
skeletons or do I have to save the BPF object somehow during the build?
Thanks,
Namhyung
On Fri, Sep 30, 2022 at 7:31 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hello,
>
> On Fri, Sep 30, 2022 at 3:48 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Sep 21, 2022 at 9:21 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > The recent change in the cgroup will break the backward compatiblity in
> > > the BPF program. It should support both old and new kernels using BPF
> > > CO-RE technique.
> > >
> > > Like the task_struct->__state handling in the offcpu analysis, we can
> > > check the field name in the cgroup struct.
> > >
> > > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > > ---
> > > Arnaldo, I think this should go through the cgroup tree since it depends
> > > on the earlier change there. I don't think it'd conflict with other
> > > perf changes but please let me know if you see any trouble, thanks!
> > >
> > > tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
> > > 1 file changed, 28 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > > index 488bd398f01d..4fe61043de04 100644
> > > --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > > +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> > > @@ -43,12 +43,39 @@ struct {
> > > __uint(value_size, sizeof(struct bpf_perf_event_value));
> > > } cgrp_readings SEC(".maps");
> > >
> > > +/* new kernel cgroup definition */
> > > +struct cgroup___new {
> > > + int level;
> > > + struct cgroup *ancestors[];
> > > +} __attribute__((preserve_access_index));
> > > +
> > > +/* old kernel cgroup definition */
> > > +struct cgroup___old {
> > > + int level;
> > > + u64 ancestor_ids[];
> > > +} __attribute__((preserve_access_index));
> > > +
> > > const volatile __u32 num_events = 1;
> > > const volatile __u32 num_cpus = 1;
> > >
> > > int enabled = 0;
> > > int use_cgroup_v2 = 0;
> > >
> > > +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
> > > +{
> > > + /* recast pointer to capture new type for compiler */
> > > + struct cgroup___new *cgrp_new = (void *)cgrp;
> > > +
> > > + if (bpf_core_field_exists(cgrp_new->ancestors)) {
> > > + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
> >
> > have you checked generated BPF code for this ancestors[level] access?
> > I'd expect CO-RE relocation for finding ancestors offset and then just
> > normal + level * 8 arithmetic, but would be nice to confirm. Apart
> > from this, looks good to me:
> >
> > Acked-by: Andrii Nakryiko <andrii@kernel.org>
>
> Thanks for your review!
>
> How can I check the generated code? Do you have something works with
> skeletons or do I have to save the BPF object somehow during the build?
>
skeleton is generated from ELF BPF object file. You can do
llvm-objdump -d <obj.bpf.o> to see instructions. Unfortunately you
can't see BPF CO-RE relocations this way, you'd have to use something
like my custom tool ([0]).
But anyways, I checked locally similar code pattern and I think it's
all good from BPF CO-RE perspective. I see appropriate relocations in
all the necessary places. So this should work.
Acked-by: Andrii Nakryiko <andrii@kernel.org>
[0] https://github.com/anakryiko/btfdump
> Thanks,
> Namhyung
On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote:
> The recent change in the cgroup will break the backward compatiblity in
> the BPF program. It should support both old and new kernels using BPF
> CO-RE technique.
>
> Like the task_struct->__state handling in the offcpu analysis, we can
> check the field name in the cgroup struct.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> Arnaldo, I think this should go through the cgroup tree since it depends
> on the earlier change there. I don't think it'd conflict with other
> perf changes but please let me know if you see any trouble, thanks!
could you please paste the cgroup tree link?
thanks,
jirka
>
> tools/perf/util/bpf_skel/bperf_cgroup.bpf.c | 29 ++++++++++++++++++++-
> 1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> index 488bd398f01d..4fe61043de04 100644
> --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
> @@ -43,12 +43,39 @@ struct {
> __uint(value_size, sizeof(struct bpf_perf_event_value));
> } cgrp_readings SEC(".maps");
>
> +/* new kernel cgroup definition */
> +struct cgroup___new {
> + int level;
> + struct cgroup *ancestors[];
> +} __attribute__((preserve_access_index));
> +
> +/* old kernel cgroup definition */
> +struct cgroup___old {
> + int level;
> + u64 ancestor_ids[];
> +} __attribute__((preserve_access_index));
> +
> const volatile __u32 num_events = 1;
> const volatile __u32 num_cpus = 1;
>
> int enabled = 0;
> int use_cgroup_v2 = 0;
>
> +static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level)
> +{
> + /* recast pointer to capture new type for compiler */
> + struct cgroup___new *cgrp_new = (void *)cgrp;
> +
> + if (bpf_core_field_exists(cgrp_new->ancestors)) {
> + return BPF_CORE_READ(cgrp_new, ancestors[level], kn, id);
> + } else {
> + /* recast pointer to capture old type for compiler */
> + struct cgroup___old *cgrp_old = (void *)cgrp;
> +
> + return BPF_CORE_READ(cgrp_old, ancestor_ids[level]);
> + }
> +}
> +
> static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> {
> struct task_struct *p = (void *)bpf_get_current_task();
> @@ -70,7 +97,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
> break;
>
> // convert cgroup-id to a map index
> - cgrp_id = BPF_CORE_READ(cgrp, ancestors[i], kn, id);
> + cgrp_id = get_cgroup_v1_ancestor_id(cgrp, i);
> elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
> if (!elem)
> continue;
> --
> 2.37.3.968.ga6b4b080e4-goog
>
Hi Jiri, On Fri, Sep 30, 2022 at 2:44 PM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > > The recent change in the cgroup will break the backward compatiblity in > > the BPF program. It should support both old and new kernels using BPF > > CO-RE technique. > > > > Like the task_struct->__state handling in the offcpu analysis, we can > > check the field name in the cgroup struct. > > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > > --- > > Arnaldo, I think this should go through the cgroup tree since it depends > > on the earlier change there. I don't think it'd conflict with other > > perf changes but please let me know if you see any trouble, thanks! > > could you please paste the cgroup tree link? Do you mean this? https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git Thanks,. Namhyung
On Fri, Sep 30, 2022 at 02:56:40PM -0700, Namhyung Kim wrote: > Hi Jiri, > > On Fri, Sep 30, 2022 at 2:44 PM Jiri Olsa <olsajiri@gmail.com> wrote: > > > > On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > > > The recent change in the cgroup will break the backward compatiblity in > > > the BPF program. It should support both old and new kernels using BPF > > > CO-RE technique. > > > > > > Like the task_struct->__state handling in the offcpu analysis, we can > > > check the field name in the cgroup struct. > > > > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > > > --- > > > Arnaldo, I think this should go through the cgroup tree since it depends > > > on the earlier change there. I don't think it'd conflict with other > > > perf changes but please let me know if you see any trouble, thanks! > > > > could you please paste the cgroup tree link? > > Do you mean this? > > https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git yea, wanted to try that.. I cherry-picked the 7f203bc89eb6 ;-) thanks, jirka > > Thanks,. > Namhyung
On September 30, 2022 6:56:40 PM GMT-03:00, Namhyung Kim <namhyung@kernel.org> wrote: >Hi Jiri, > >On Fri, Sep 30, 2022 at 2:44 PM Jiri Olsa <olsajiri@gmail.com> wrote: >> >> On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: >> > The recent change in the cgroup will break the backward compatiblity in >> > the BPF program. It should support both old and new kernels using BPF >> > CO-RE technique. >> > >> > Like the task_struct->__state handling in the offcpu analysis, we can >> > check the field name in the cgroup struct. >> > >> > Signed-off-by: Namhyung Kim <namhyung@kernel.org> >> > --- >> > Arnaldo, I think this should go through the cgroup tree since it depends >> > on the earlier change there. I don't think it'd conflict with other >> > perf changes but please let me know if you see any trouble, thanks! >> >> could you please paste the cgroup tree link? > >Do you mean this? > > https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git > Which branch and cset in that tree does you perf skel depends on? - Arnaldo >Thanks,. >Namhyung
On Fri, Sep 30, 2022 at 3:00 PM Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com> wrote: > > > > On September 30, 2022 6:56:40 PM GMT-03:00, Namhyung Kim <namhyung@kernel.org> wrote: > >Hi Jiri, > > > >On Fri, Sep 30, 2022 at 2:44 PM Jiri Olsa <olsajiri@gmail.com> wrote: > >> > >> On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > >> > The recent change in the cgroup will break the backward compatiblity in > >> > the BPF program. It should support both old and new kernels using BPF > >> > CO-RE technique. > >> > > >> > Like the task_struct->__state handling in the offcpu analysis, we can > >> > check the field name in the cgroup struct. > >> > > >> > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > >> > --- > >> > Arnaldo, I think this should go through the cgroup tree since it depends > >> > on the earlier change there. I don't think it'd conflict with other > >> > perf changes but please let me know if you see any trouble, thanks! > >> > >> could you please paste the cgroup tree link? > > > >Do you mean this? > > > > https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git > > > > > Which branch and cset in that tree does you perf skel depends on? I believe it's for-6.1 and the cset is in https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/commit/?h=for-6.1&id=7f203bc89eb66d6afde7eae91347fc0352090cc3 Thanks, Namhyung
On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > The recent change in the cgroup will break the backward compatiblity in > the BPF program. It should support both old and new kernels using BPF > CO-RE technique. > > Like the task_struct->__state handling in the offcpu analysis, we can > check the field name in the cgroup struct. > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > --- > Arnaldo, I think this should go through the cgroup tree since it depends > on the earlier change there. I don't think it'd conflict with other > perf changes but please let me know if you see any trouble, thanks! FWIW, looks fine to me and I'd be happy to route this through the cgroup tree once it gets acked. Thanks. -- tejun
On Fri, Sep 23, 2022 at 8:22 PM Tejun Heo <tj@kernel.org> wrote: > > On Wed, Sep 21, 2022 at 09:14:35PM -0700, Namhyung Kim wrote: > > The recent change in the cgroup will break the backward compatiblity in > > the BPF program. It should support both old and new kernels using BPF > > CO-RE technique. > > > > Like the task_struct->__state handling in the offcpu analysis, we can > > check the field name in the cgroup struct. > > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org> > > --- > > Arnaldo, I think this should go through the cgroup tree since it depends > > on the earlier change there. I don't think it'd conflict with other > > perf changes but please let me know if you see any trouble, thanks! > > FWIW, looks fine to me and I'd be happy to route this through the cgroup > tree once it gets acked. Thanks Tejun! Can any perf + bpf folks take a look?
© 2016 - 2026 Red Hat, Inc.