Creation of all files in the resctrl file system is under control of
the file system layer.
But some resources may need to add a file to the info/{resource}
directory for debug purposes.
Add a new rdt_resource::info_file field for the resource to specify
show() and/or write() operations. These will be called with the
rdtgroup_mutex held.
Architecture can note the file is only for debug using by setting
the rftype::flags RFTYPE_DEBUG bit.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 33 +++++++++++++++++++++++++++
fs/resctrl/internal.h | 31 ++-----------------------
fs/resctrl/rdtgroup.c | 50 ++++++++++++++++++++++++++++++++++++++---
3 files changed, 82 insertions(+), 32 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b7e15abcde23..e067007c633c 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -73,6 +73,37 @@ enum resctrl_conf_type {
#define CDP_NUM_TYPES (CDP_DATA + 1)
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: File name
+ * @mode: Access mode
+ * @kf_ops: File operations
+ * @flags: File specific RFTYPE_FLAGS_* flags
+ * @fflags: File specific RFTYPE_* flags
+ * @seq_show: Show content of the file
+ * @write: Write to the file
+ */
+struct rftype {
+ char *name;
+ umode_t mode;
+ const struct kernfs_ops *kf_ops;
+ unsigned long flags;
+ unsigned long fflags;
+
+ int (*seq_show)(struct kernfs_open_file *of,
+ struct seq_file *sf, void *v);
+ /*
+ * write() is the generic write callback which maps directly to
+ * kernfs write operation and overrides all other operations.
+ * Maximum write size is determined by ->max_write_len.
+ */
+ ssize_t (*write)(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+};
+
+/* Only rftype::flags option available to architecture code */
+#define RFTYPE_DEBUG BIT(10)
+
/*
* struct pseudo_lock_region - pseudo-lock region information
* @s: Resctrl schema for the resource to which this
@@ -281,6 +312,7 @@ enum resctrl_schema_fmt {
* @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
* monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
+ * @info_file: Optional per-resource debug info file
*/
struct rdt_resource {
int rid;
@@ -297,6 +329,7 @@ struct rdt_resource {
enum resctrl_schema_fmt schema_fmt;
unsigned int mbm_cfg_mask;
bool cdp_capable;
+ struct rftype info_file;
};
/*
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 1f4800cfcd6a..f13b63804c1a 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -232,7 +232,8 @@ struct rdtgroup {
#define RFTYPE_RES_MB BIT(9)
-#define RFTYPE_DEBUG BIT(10)
+// RFTYPE_DEBUG available to architecture code in <linux/resctrl.h>
+//#define RFTYPE_DEBUG BIT(10)
#define RFTYPE_RES_PERF_PKG BIT(11)
@@ -251,34 +252,6 @@ extern struct list_head rdt_all_groups;
extern int max_name_width;
-/**
- * struct rftype - describe each file in the resctrl file system
- * @name: File name
- * @mode: Access mode
- * @kf_ops: File operations
- * @flags: File specific RFTYPE_FLAGS_* flags
- * @fflags: File specific RFTYPE_* flags
- * @seq_show: Show content of the file
- * @write: Write to the file
- */
-struct rftype {
- char *name;
- umode_t mode;
- const struct kernfs_ops *kf_ops;
- unsigned long flags;
- unsigned long fflags;
-
- int (*seq_show)(struct kernfs_open_file *of,
- struct seq_file *sf, void *v);
- /*
- * write() is the generic write callback which maps directly to
- * kernfs write operation and overrides all other operations.
- * Maximum write size is determined by ->max_write_len.
- */
- ssize_t (*write)(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-};
-
/**
* struct mbm_state - status for each MBM counter in each domain
* @prev_bw_bytes: Previous bytes value read for bandwidth calculation
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index e212e46e0780..f09674c209f8 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -329,6 +329,37 @@ static const struct kernfs_ops rdtgroup_kf_single_ops = {
.seq_show = rdtgroup_seqfile_show,
};
+static int rdtgroup_seqfile_show_locked(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ struct rftype *rft = of->kn->priv;
+
+ guard(mutex)(&rdtgroup_mutex);
+
+ if (rft->seq_show)
+ return rft->seq_show(of, m, arg);
+ return 0;
+}
+
+static ssize_t rdtgroup_file_write_locked(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rftype *rft = of->kn->priv;
+
+ guard(mutex)(&rdtgroup_mutex);
+
+ if (rft->write)
+ return rft->write(of, buf, nbytes, off);
+
+ return -EINVAL;
+}
+
+static const struct kernfs_ops rdtgroup_kf_single_locked_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .write = rdtgroup_file_write_locked,
+ .seq_show = rdtgroup_seqfile_show_locked,
+};
+
static const struct kernfs_ops kf_mondata_ops = {
.atomic_write_len = PAGE_SIZE,
.seq_show = rdtgroup_mondata_show,
@@ -2162,7 +2193,7 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
return ret;
}
-static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
+static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, void *priv, char *name,
unsigned long fflags)
{
struct kernfs_node *kn_subdir;
@@ -2177,6 +2208,19 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
if (ret)
return ret;
+ if (r->info_file.name &&
+ (!(r->info_file.flags & RFTYPE_DEBUG) || resctrl_debug)) {
+ r->info_file.mode = 0;
+ if (r->info_file.seq_show)
+ r->info_file.mode |= 0444;
+ if (r->info_file.write)
+ r->info_file.mode |= 0200;
+ r->info_file.kf_ops = &rdtgroup_kf_single_locked_ops;
+ ret = rdtgroup_add_file(kn_subdir, &r->info_file);
+ if (ret)
+ return ret;
+ }
+
ret = rdtgroup_add_files(kn_subdir, fflags);
if (!ret)
kernfs_activate(kn_subdir);
@@ -2221,7 +2265,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
- ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
+ ret = rdtgroup_mkdir_info_resdir(r, s, s->name, fflags);
if (ret)
goto out_destroy;
}
@@ -2229,7 +2273,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
for_each_mon_capable_rdt_resource(r) {
fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
sprintf(name, "%s_MON", r->name);
- ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
+ ret = rdtgroup_mkdir_info_resdir(r, r, name, fflags);
if (ret)
goto out_destroy;
}
--
2.49.0
Hi Tony,
On 5/21/25 3:50 PM, Tony Luck wrote:
> Creation of all files in the resctrl file system is under control of
> the file system layer.
>
> But some resources may need to add a file to the info/{resource}
> directory for debug purposes.
>
> Add a new rdt_resource::info_file field for the resource to specify
> show() and/or write() operations. These will be called with the
> rdtgroup_mutex held.
>
> Architecture can note the file is only for debug using by setting
> the rftype::flags RFTYPE_DEBUG bit.
This needs to change. This punches a crater through the separation
between fs and arch that we worked hard to achieve. Please make an attempt
to do so as I am sure you can.
Reinette
On Tue, Jun 03, 2025 at 09:15:02PM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 5/21/25 3:50 PM, Tony Luck wrote:
> > Creation of all files in the resctrl file system is under control of
> > the file system layer.
> >
> > But some resources may need to add a file to the info/{resource}
> > directory for debug purposes.
> >
> > Add a new rdt_resource::info_file field for the resource to specify
> > show() and/or write() operations. These will be called with the
> > rdtgroup_mutex held.
> >
> > Architecture can note the file is only for debug using by setting
> > the rftype::flags RFTYPE_DEBUG bit.
>
> This needs to change. This punches a crater through the separation
> between fs and arch that we worked hard to achieve. Please make an attempt
> to do so as I am sure you can.
The file I want to create here amy only be of interest in debugging
the telemetry h/w interface. So my next choice is debugfs.
But creation of the debugfs "resctrl" directory is done by file
system code and the debugfs_resctrl variable is only marked "extern" by
fs/resctrl/internal.h, so currently not accessible to architecture code.
Is that a deliberate choice? Would it be OK to make that visible to
architecture code to create files in /sys/kernel/debug/resctrl?
Or should I add my file in a new /sys/kernel/debug/x86/resctrl
directory?
-Tony
Hi Tony,
On 6/5/25 5:09 PM, Luck, Tony wrote:
> On Tue, Jun 03, 2025 at 09:15:02PM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 5/21/25 3:50 PM, Tony Luck wrote:
>>> Creation of all files in the resctrl file system is under control of
>>> the file system layer.
>>>
>>> But some resources may need to add a file to the info/{resource}
>>> directory for debug purposes.
>>>
>>> Add a new rdt_resource::info_file field for the resource to specify
>>> show() and/or write() operations. These will be called with the
>>> rdtgroup_mutex held.
>>>
>>> Architecture can note the file is only for debug using by setting
>>> the rftype::flags RFTYPE_DEBUG bit.
>>
>> This needs to change. This punches a crater through the separation
>> between fs and arch that we worked hard to achieve. Please make an attempt
>> to do so as I am sure you can.
>
> The file I want to create here amy only be of interest in debugging
> the telemetry h/w interface. So my next choice is debugfs.
I believe we can make either work but debugfs does indeed sound most
appropriate. I am not familiar with customs surrounding kernel support for
hardware debug interfaces though, so setting that aside for now.
>
> But creation of the debugfs "resctrl" directory is done by file
> system code and the debugfs_resctrl variable is only marked "extern" by
> fs/resctrl/internal.h, so currently not accessible to architecture code.
It would be best to stay this way.
>
> Is that a deliberate choice? Would it be OK to make that visible to
> architecture code to create files in /sys/kernel/debug/resctrl?
It should not be necessary to give an arch total control over
resctrl fs's debugfs.
>
> Or should I add my file in a new /sys/kernel/debug/x86/resctrl
> directory?
Since there is already a resctrl debugfs I think it will be less confusing
to keep all resctrl debug together within /sys/kernel/debug/resctrl.
There should be some bounds on what an arch can do here though.
There is already support for debugfs via the pseudo-locking
work where /sys/kernel/debug/resctrl contains directories with name
of resource group to provide debug for specific resource group. Giving
an arch total freedom on what can be created in /sys/kernel/debug/resctrl
thus runs the risk of exposing to corner cases where name of arch
debug cannot match resource group names and with ordering of these
directory creations it can become tricky.
With /sys/kernel/debug/resctrl potentially mirroring /sys/fs/resctrl to
support various debugging scenarios there may later be resource level
debugging for which a "/sys/kernel/debug/resctrl/info/<resource>/<debugfile>" can
be used. Considering this it looks to me as though one possible boundary could
be to isolate arch specific debug to, for example, a new directory named
"/sys/kernel/debug/resctrl/info/arch_debug_name_tbd/". By placing the
arch debug in a sub-directory named "info" it avoids collision with resource
group names with naming that also avoids collision with resource names since
all these names are controlled by resctrl fs.
To support this an architecture can request resctrl fs to create such directory
after resctrl_init() succeeds. Since it is custom to ignore errors for
debugfs dir creation the call is not expected to interfere with initialization
and existing arch initialization should not be impacted with this call
being after resctrl__init().
For example, for x86 it could be something like:
struct dentry *arch_priv_debug_fs_dir;
resctrl_arch_late_init(void) {
...
ret = resctrl_init();
if (ret) {
cpuhp_remove_state(state);
return ret;
}
rdt_online = state;
arch_priv_debug_fs_dir = resctrl_create_arch_debugfs(); /* all names up for improvement */
...
}
Note that the architecture does not pick the directory name.
On the resctrl fs side, resctrl_create_arch_debugfs() creates
"/sys/kernel/debug/resctrl/info/arch_debug_name_tbd", passing the
dentry back to the arch and with this gives the arch flexibility to create
new directories and files to match its debug requirements.
The arch has flexibility to manage the lifetimes of files/directories of
its debugfs area while resctrl fs still has the final control
with the debugfs_remove_recursive() of the top level /sys/kernel/debug/resctrl
on its exit.
What do you think?
It will need more work to support an arch-specific debug that is
connected to a resource or resource group, but this does not seem to be the
goal here. Even so, with a design like this it keeps the door open for
future resctrl fs and/or arch debug associated with resources and
resource groups.
Reinette
On Fri, Jun 06, 2025 at 09:26:06AM -0700, Reinette Chatre wrote:
> With /sys/kernel/debug/resctrl potentially mirroring /sys/fs/resctrl to
> support various debugging scenarios there may later be resource level
> debugging for which a "/sys/kernel/debug/resctrl/info/<resource>/<debugfile>" can
> be used. Considering this it looks to me as though one possible boundary could
> be to isolate arch specific debug to, for example, a new directory named
> "/sys/kernel/debug/resctrl/info/arch_debug_name_tbd/". By placing the
> arch debug in a sub-directory named "info" it avoids collision with resource
> group names with naming that also avoids collision with resource names since
> all these names are controlled by resctrl fs.
That seems like a good path. PoC patch below. Note that I put the dentry
for the debug info directory into struct rdt_resource. So no call from
architecture to file system code needed to access.
Directory layout looks like this:
# tree /sys/kernel/debug/resctrl/
/sys/kernel/debug/resctrl/
└── info
├── L2
├── L3
├── MB
└── SMBA
6 directories, 0 files
-Tony
---
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 5e28e81b35f6..78dd0f8f7ad8 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -281,6 +281,7 @@ enum resctrl_schema_fmt {
* @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
* monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
+ * @arch_debug_info: Debugfs info directory for architecture use
*/
struct rdt_resource {
int rid;
@@ -297,6 +298,7 @@ struct rdt_resource {
enum resctrl_schema_fmt schema_fmt;
unsigned int mbm_cfg_mask;
bool cdp_capable;
+ struct dentry *arch_debug_info;
};
/*
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index ed4fc45da346..48c587201fb6 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -4274,6 +4274,8 @@ void resctrl_offline_cpu(unsigned int cpu)
*/
int resctrl_init(void)
{
+ struct dentry *debuginfodir;
+ struct rdt_resource *r;
int ret = 0;
seq_buf_init(&last_cmd_status, last_cmd_status_buf,
@@ -4320,6 +4322,12 @@ int resctrl_init(void)
*/
debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
+ /* Create debug info directories for each resource */
+ debuginfodir = debugfs_create_dir("info", debugfs_resctrl);
+
+ for_each_rdt_resource(r)
+ r->arch_debug_info = debugfs_create_dir(r->name, debuginfodir);
+
return 0;
cleanup_mountpoint:
Hi Tony,
On 6/6/25 10:30 AM, Luck, Tony wrote:
> On Fri, Jun 06, 2025 at 09:26:06AM -0700, Reinette Chatre wrote:
>> With /sys/kernel/debug/resctrl potentially mirroring /sys/fs/resctrl to
>> support various debugging scenarios there may later be resource level
>> debugging for which a "/sys/kernel/debug/resctrl/info/<resource>/<debugfile>" can
>> be used. Considering this it looks to me as though one possible boundary could
>> be to isolate arch specific debug to, for example, a new directory named
>> "/sys/kernel/debug/resctrl/info/arch_debug_name_tbd/". By placing the
>> arch debug in a sub-directory named "info" it avoids collision with resource
>> group names with naming that also avoids collision with resource names since
>> all these names are controlled by resctrl fs.
>
>
> That seems like a good path. PoC patch below. Note that I put the dentry
> for the debug info directory into struct rdt_resource. So no call from
> architecture to file system code needed to access.
ok, reading between the lines there is now a switch to per-resource
requirement, which fits with the use.
>
> Directory layout looks like this:
>
> # tree /sys/kernel/debug/resctrl/
> /sys/kernel/debug/resctrl/
> └── info
> ├── L2
> ├── L3
> ├── MB
> └── SMBA
>
This looks like something that needs to be owned and managed by
resctrl fs (more below).
> 6 directories, 0 files
>
> -Tony
>
> ---
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 5e28e81b35f6..78dd0f8f7ad8 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -281,6 +281,7 @@ enum resctrl_schema_fmt {
> * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
> * monitoring events can be configured.
> * @cdp_capable: Is the CDP feature available on this resource
> + * @arch_debug_info: Debugfs info directory for architecture use
> */
> struct rdt_resource {
> int rid;
> @@ -297,6 +298,7 @@ struct rdt_resource {
> enum resctrl_schema_fmt schema_fmt;
> unsigned int mbm_cfg_mask;
> bool cdp_capable;
> + struct dentry *arch_debug_info;
> };
ok ... but maybe not quite exactly (more below)
>
> /*
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index ed4fc45da346..48c587201fb6 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -4274,6 +4274,8 @@ void resctrl_offline_cpu(unsigned int cpu)
> */
> int resctrl_init(void)
> {
> + struct dentry *debuginfodir;
> + struct rdt_resource *r;
> int ret = 0;
>
> seq_buf_init(&last_cmd_status, last_cmd_status_buf,
> @@ -4320,6 +4322,12 @@ int resctrl_init(void)
> */
> debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
>
> + /* Create debug info directories for each resource */
> + debuginfodir = debugfs_create_dir("info", debugfs_resctrl);
> +
> + for_each_rdt_resource(r)
> + r->arch_debug_info = debugfs_create_dir(r->name, debuginfodir);
This ignores (*) several of the boundaries my response aimed to establish.
Here are some red flags:
- This creates the resource named directory and hands off that pointer to the
arch. As I mentioned the arch should not have control over resctrl's debugfs.
I believe this is the type of information that should be in control of resctrl fs
since, as I mentioned, resctrl fs may need to add debugging that mirrors /sys/fs/resctrl.
- Blindly creating these directories (a) without the resource even existing on the
system, and (b) without being used/requested by the architecture does not create a good
interface in my opinion. User space will see a bunch of empty directories
associated with resources that are not present on the system.
- The directories created do not even match /sys/fs/resctrl/info when it comes
to the resources. Note that the directories within /sys/fs/resctrl/info are created
from the schema for control resources and appends _MON to monitor resources. Like
I mentioned in my earlier response there should ideally be space for a future
resctrl fs extension to mirror layout of /sys/fs/resctrl for resctrl fs debug
in debugfs. This solution ignores all of that.
I still think that the architecture should request the debugfs directory from resctrl fs.
This avoids resctrl fs needing to create directories/files that are never used and
does not present user space with an empty tree. Considering that the new PERF_PKG
resource may not come online until resctrl mount this should be something that can be
called at any time.
One possibility, that supports intended use while keeping the door open to support
future resctrl fs use of the debugfs, could be a new resctrl fs function,
for example resctrl_create_mon_resource_debugfs(struct rdt_resource *r), that will initialize
rdt_resource::arch_debug_info(*) to point to the dentry of newly created
/sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD *if*
the associated resource is capable of monitoring ... or do you think an architecture
may want to add debugging information before a resource is discovered/enabled?
If doing this then rdt_resource::arch_debug_info is no longer appropriate since it needs
to be specific to the monitoring resource. Perhaps then rdt_resource::arch_mon_debugfs
that would eventually live in [1]?
This is feeling rushed and I am sharing some top of mind ideas. I will give this
more thought.
Reinette
[1] https://lore.kernel.org/lkml/cb8425c73f57280b0b4f22e089b2912eede42f7a.1747349530.git.babu.moger@amd.com/
(*) I have now asked several times to stop ignoring feedback. This should not even
be necessary in the first place. I do not require you to agree with me and I do not claim
to always be right, please just stop ignoring feedback. The way forward I plan to ignore
messages that ignores feedback.
On Fri, Jun 06, 2025 at 02:14:56PM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 6/6/25 10:30 AM, Luck, Tony wrote:
> > On Fri, Jun 06, 2025 at 09:26:06AM -0700, Reinette Chatre wrote:
> >> With /sys/kernel/debug/resctrl potentially mirroring /sys/fs/resctrl to
> >> support various debugging scenarios there may later be resource level
> >> debugging for which a "/sys/kernel/debug/resctrl/info/<resource>/<debugfile>" can
> >> be used. Considering this it looks to me as though one possible boundary could
> >> be to isolate arch specific debug to, for example, a new directory named
> >> "/sys/kernel/debug/resctrl/info/arch_debug_name_tbd/". By placing the
> >> arch debug in a sub-directory named "info" it avoids collision with resource
> >> group names with naming that also avoids collision with resource names since
> >> all these names are controlled by resctrl fs.
> >
> >
> > That seems like a good path. PoC patch below. Note that I put the dentry
> > for the debug info directory into struct rdt_resource. So no call from
> > architecture to file system code needed to access.
>
> ok, reading between the lines there is now a switch to per-resource
> requirement, which fits with the use.
>
> >
> > Directory layout looks like this:
> >
> > # tree /sys/kernel/debug/resctrl/
> > /sys/kernel/debug/resctrl/
> > └── info
> > ├── L2
> > ├── L3
> > ├── MB
> > └── SMBA
> >
>
> This looks like something that needs to be owned and managed by
> resctrl fs (more below).
>
> > 6 directories, 0 files
> >
> > -Tony
> >
> > ---
> >
> > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> > index 5e28e81b35f6..78dd0f8f7ad8 100644
> > --- a/include/linux/resctrl.h
> > +++ b/include/linux/resctrl.h
> > @@ -281,6 +281,7 @@ enum resctrl_schema_fmt {
> > * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
> > * monitoring events can be configured.
> > * @cdp_capable: Is the CDP feature available on this resource
> > + * @arch_debug_info: Debugfs info directory for architecture use
> > */
> > struct rdt_resource {
> > int rid;
> > @@ -297,6 +298,7 @@ struct rdt_resource {
> > enum resctrl_schema_fmt schema_fmt;
> > unsigned int mbm_cfg_mask;
> > bool cdp_capable;
> > + struct dentry *arch_debug_info;
> > };
>
> ok ... but maybe not quite exactly (more below)
Would have been useful with the "always create directories" approach.
As you point out below the name is problematic. Would need separate
entries for control and monitor resources like RDT_RESOURCE_L3.
I don't think it is useful in the "only make directories when requested
by architecture" mode.
> >
> > /*
> > diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> > index ed4fc45da346..48c587201fb6 100644
> > --- a/fs/resctrl/rdtgroup.c
> > +++ b/fs/resctrl/rdtgroup.c
> > @@ -4274,6 +4274,8 @@ void resctrl_offline_cpu(unsigned int cpu)
> > */
> > int resctrl_init(void)
> > {
> > + struct dentry *debuginfodir;
> > + struct rdt_resource *r;
> > int ret = 0;
> >
> > seq_buf_init(&last_cmd_status, last_cmd_status_buf,
> > @@ -4320,6 +4322,12 @@ int resctrl_init(void)
> > */
> > debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
> >
> > + /* Create debug info directories for each resource */
> > + debuginfodir = debugfs_create_dir("info", debugfs_resctrl);
> > +
> > + for_each_rdt_resource(r)
> > + r->arch_debug_info = debugfs_create_dir(r->name, debuginfodir);
>
> This ignores (*) several of the boundaries my response aimed to establish.
>
> Here are some red flags:
> - This creates the resource named directory and hands off that pointer to the
> arch. As I mentioned the arch should not have control over resctrl's debugfs.
> I believe this is the type of information that should be in control of resctrl fs
> since, as I mentioned, resctrl fs may need to add debugging that mirrors /sys/fs/resctrl.
> - Blindly creating these directories (a) without the resource even existing on the
> system, and (b) without being used/requested by the architecture does not create a good
> interface in my opinion. User space will see a bunch of empty directories
> associated with resources that are not present on the system.
> - The directories created do not even match /sys/fs/resctrl/info when it comes
> to the resources. Note that the directories within /sys/fs/resctrl/info are created
> from the schema for control resources and appends _MON to monitor resources. Like
> I mentioned in my earlier response there should ideally be space for a future
> resctrl fs extension to mirror layout of /sys/fs/resctrl for resctrl fs debug
> in debugfs. This solution ignores all of that.
>
> I still think that the architecture should request the debugfs directory from resctrl fs.
> This avoids resctrl fs needing to create directories/files that are never used and
> does not present user space with an empty tree. Considering that the new PERF_PKG
> resource may not come online until resctrl mount this should be something that can be
> called at any time.
>
> One possibility, that supports intended use while keeping the door open to support
> future resctrl fs use of the debugfs, could be a new resctrl fs function,
> for example resctrl_create_mon_resource_debugfs(struct rdt_resource *r), that will initialize
> rdt_resource::arch_debug_info(*) to point to the dentry of newly created
> /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD *if*
> the associated resource is capable of monitoring ... or do you think an architecture
> may want to add debugging information before a resource is discovered/enabled?
> If doing this then rdt_resource::arch_debug_info is no longer appropriate since it needs
> to be specific to the monitoring resource. Perhaps then rdt_resource::arch_mon_debugfs
> that would eventually live in [1]?
>
> This is feeling rushed and I am sharing some top of mind ideas. I will give this
> more thought.
>
> Reinette
>
> [1] https://lore.kernel.org/lkml/cb8425c73f57280b0b4f22e089b2912eede42f7a.1747349530.git.babu.moger@amd.com/
>
> (*) I have now asked several times to stop ignoring feedback. This should not even
> be necessary in the first place. I do not require you to agree with me and I do not claim
> to always be right, please just stop ignoring feedback. The way forward I plan to ignore
> messages that ignores feedback.
So here's a second PoC. Takes into account all of the points you make
above with the following adjustments:
1) Not adding the rdt_resource::arch_mon_debugfs field. Just returning
the "struct dentry *" looks to be adequate for existing use case.
Having the pointer in "struct resource" would be useful if some future
use case needed to access the debugfs locations from calls to
architecture code that pass in the rdt_resource pointer. Could be
added if ever needed.
2) I can't envision a need for debugfs entries for resources
pre-discovery, or when not enabled. So keep things simple for
now.
3) I think the function name resctrl_debugfs_mon_info_mkdir() is a bit
more descriptive (it is making a directory and we usually have such
functions include "mkdir" in the name).
-Tony
---
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 8bec8f766b01..771e69c0c5c1 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -564,6 +564,12 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
+/**
+ * resctrl_debugfs_mon_info_mkdir() - Create a debugfs info directory.
+ * @r: Resource (must be mon_capable).
+ */
+struct dentry *resctrl_debugfs_mon_info_mkdir(struct rdt_resource *r);
+
int resctrl_init(void);
void resctrl_exit(void);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 8d094a3acf2f..0f11b8d0ce0b 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -4344,6 +4344,22 @@ int resctrl_init(void)
return ret;
}
+struct dentry *resctrl_debugfs_mon_info_mkdir(struct rdt_resource *r)
+{
+ static struct dentry *debugfs_resctrl_info;
+ char name[32];
+
+ if (!r->mon_capable)
+ return NULL;
+
+ if (!debugfs_resctrl_info)
+ debugfs_resctrl_info = debugfs_create_dir("info", debugfs_resctrl);
+
+ sprintf(name, "%s_MON", r->name);
+
+ return debugfs_create_dir(name, debugfs_resctrl_info);
+}
+
static bool resctrl_online_domains_exist(void)
{
struct rdt_resource *r;
--
2.49.0
Tony,
On 6/9/25 11:49 AM, Luck, Tony wrote:
> On Fri, Jun 06, 2025 at 02:14:56PM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 6/6/25 10:30 AM, Luck, Tony wrote:
>>> On Fri, Jun 06, 2025 at 09:26:06AM -0700, Reinette Chatre wrote:
>>>> With /sys/kernel/debug/resctrl potentially mirroring /sys/fs/resctrl to
>>>> support various debugging scenarios there may later be resource level
>>>> debugging for which a "/sys/kernel/debug/resctrl/info/<resource>/<debugfile>" can
>>>> be used. Considering this it looks to me as though one possible boundary could
>>>> be to isolate arch specific debug to, for example, a new directory named
>>>> "/sys/kernel/debug/resctrl/info/arch_debug_name_tbd/". By placing the
>>>> arch debug in a sub-directory named "info" it avoids collision with resource
>>>> group names with naming that also avoids collision with resource names since
>>>> all these names are controlled by resctrl fs.
>>>
>>>
>>> That seems like a good path. PoC patch below. Note that I put the dentry
>>> for the debug info directory into struct rdt_resource. So no call from
>>> architecture to file system code needed to access.
>>
>> ok, reading between the lines there is now a switch to per-resource
>> requirement, which fits with the use.
>>
>>>
>>> Directory layout looks like this:
>>>
>>> # tree /sys/kernel/debug/resctrl/
>>> /sys/kernel/debug/resctrl/
>>> └── info
>>> ├── L2
>>> ├── L3
>>> ├── MB
>>> └── SMBA
>>>
>>
>> This looks like something that needs to be owned and managed by
>> resctrl fs (more below).
>>
>>> 6 directories, 0 files
>>>
>>> -Tony
>>>
>>> ---
>>>
>>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>>> index 5e28e81b35f6..78dd0f8f7ad8 100644
>>> --- a/include/linux/resctrl.h
>>> +++ b/include/linux/resctrl.h
>>> @@ -281,6 +281,7 @@ enum resctrl_schema_fmt {
>>> * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
>>> * monitoring events can be configured.
>>> * @cdp_capable: Is the CDP feature available on this resource
>>> + * @arch_debug_info: Debugfs info directory for architecture use
>>> */
>>> struct rdt_resource {
>>> int rid;
>>> @@ -297,6 +298,7 @@ struct rdt_resource {
>>> enum resctrl_schema_fmt schema_fmt;
>>> unsigned int mbm_cfg_mask;
>>> bool cdp_capable;
>>> + struct dentry *arch_debug_info;
>>> };
>>
>> ok ... but maybe not quite exactly (more below)
>
> Would have been useful with the "always create directories" approach.
> As you point out below the name is problematic. Would need separate
> entries for control and monitor resources like RDT_RESOURCE_L3.
>
> I don't think it is useful in the "only make directories when requested
> by architecture" mode.
>
>>>
>>> /*
>>> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
>>> index ed4fc45da346..48c587201fb6 100644
>>> --- a/fs/resctrl/rdtgroup.c
>>> +++ b/fs/resctrl/rdtgroup.c
>>> @@ -4274,6 +4274,8 @@ void resctrl_offline_cpu(unsigned int cpu)
>>> */
>>> int resctrl_init(void)
>>> {
>>> + struct dentry *debuginfodir;
>>> + struct rdt_resource *r;
>>> int ret = 0;
>>>
>>> seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>>> @@ -4320,6 +4322,12 @@ int resctrl_init(void)
>>> */
>>> debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
>>>
>>> + /* Create debug info directories for each resource */
>>> + debuginfodir = debugfs_create_dir("info", debugfs_resctrl);
>>> +
>>> + for_each_rdt_resource(r)
>>> + r->arch_debug_info = debugfs_create_dir(r->name, debuginfodir);
>>
>> This ignores (*) several of the boundaries my response aimed to establish.
>>
>> Here are some red flags:
>> - This creates the resource named directory and hands off that pointer to the
>> arch. As I mentioned the arch should not have control over resctrl's debugfs.
>> I believe this is the type of information that should be in control of resctrl fs
>> since, as I mentioned, resctrl fs may need to add debugging that mirrors /sys/fs/resctrl.
>> - Blindly creating these directories (a) without the resource even existing on the
>> system, and (b) without being used/requested by the architecture does not create a good
>> interface in my opinion. User space will see a bunch of empty directories
>> associated with resources that are not present on the system.
>> - The directories created do not even match /sys/fs/resctrl/info when it comes
>> to the resources. Note that the directories within /sys/fs/resctrl/info are created
>> from the schema for control resources and appends _MON to monitor resources. Like
>> I mentioned in my earlier response there should ideally be space for a future
>> resctrl fs extension to mirror layout of /sys/fs/resctrl for resctrl fs debug
>> in debugfs. This solution ignores all of that.
>>
>> I still think that the architecture should request the debugfs directory from resctrl fs.
>> This avoids resctrl fs needing to create directories/files that are never used and
>> does not present user space with an empty tree. Considering that the new PERF_PKG
>> resource may not come online until resctrl mount this should be something that can be
>> called at any time.
>>
>> One possibility, that supports intended use while keeping the door open to support
>> future resctrl fs use of the debugfs, could be a new resctrl fs function,
>> for example resctrl_create_mon_resource_debugfs(struct rdt_resource *r), that will initialize
>> rdt_resource::arch_debug_info(*) to point to the dentry of newly created
>> /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD *if*
>> the associated resource is capable of monitoring ... or do you think an architecture
>> may want to add debugging information before a resource is discovered/enabled?
>> If doing this then rdt_resource::arch_debug_info is no longer appropriate since it needs
>> to be specific to the monitoring resource. Perhaps then rdt_resource::arch_mon_debugfs
>> that would eventually live in [1]?
>>
>> This is feeling rushed and I am sharing some top of mind ideas. I will give this
>> more thought.
>>
>> Reinette
>>
>> [1] https://lore.kernel.org/lkml/cb8425c73f57280b0b4f22e089b2912eede42f7a.1747349530.git.babu.moger@amd.com/
>>
>> (*) I have now asked several times to stop ignoring feedback. This should not even
>> be necessary in the first place. I do not require you to agree with me and I do not claim
>> to always be right, please just stop ignoring feedback. The way forward I plan to ignore
>> messages that ignores feedback.
>
> So here's a second PoC. Takes into account all of the points you make
> above with the following adjustments:
>
> 1) Not adding the rdt_resource::arch_mon_debugfs field. Just returning
> the "struct dentry *" looks to be adequate for existing use case.
>
> Having the pointer in "struct resource" would be useful if some future
> use case needed to access the debugfs locations from calls to
> architecture code that pass in the rdt_resource pointer. Could be
> added if ever needed.
>
> 2) I can't envision a need for debugfs entries for resources
> pre-discovery, or when not enabled. So keep things simple for
> now.
>
> 3) I think the function name resctrl_debugfs_mon_info_mkdir() is a bit
> more descriptive (it is making a directory and we usually have such
> functions include "mkdir" in the name).
>
> -Tony
>
> ---
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 8bec8f766b01..771e69c0c5c1 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -564,6 +564,12 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
> extern unsigned int resctrl_rmid_realloc_threshold;
> extern unsigned int resctrl_rmid_realloc_limit;
>
> +/**
> + * resctrl_debugfs_mon_info_mkdir() - Create a debugfs info directory.
> + * @r: Resource (must be mon_capable).
> + */
> +struct dentry *resctrl_debugfs_mon_info_mkdir(struct rdt_resource *r);
> +
> int resctrl_init(void);
> void resctrl_exit(void);
>
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index 8d094a3acf2f..0f11b8d0ce0b 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -4344,6 +4344,22 @@ int resctrl_init(void)
> return ret;
> }
>
> +struct dentry *resctrl_debugfs_mon_info_mkdir(struct rdt_resource *r)
> +{
> + static struct dentry *debugfs_resctrl_info;
> + char name[32];
> +
> + if (!r->mon_capable)
> + return NULL;
> +
> + if (!debugfs_resctrl_info)
> + debugfs_resctrl_info = debugfs_create_dir("info", debugfs_resctrl);
> +
> + sprintf(name, "%s_MON", r->name);
> +
> + return debugfs_create_dir(name, debugfs_resctrl_info);
> +}
> +
> static bool resctrl_online_domains_exist(void)
> {
> struct rdt_resource *r;
Why do you keep insisting without motivation on handing control of what
should be resctrl fs managed directories to architecture? Twice have I suggested
that an arch private directory be created for the arch debugfs and every
time you create a patch without motivation where arch gets control of what
should be resctrl fs managed. Again, if my suggestions are flawed it is an
opportunity for a teaching moment, never should be ignored. I highligted that
this is not ideal in the message you are responding to. I'm done.
Reinette
Reinette, Trimming to focus on why I was confused by your message. >> One possibility, that supports intended use while keeping the door open to support >> future resctrl fs use of the debugfs, could be a new resctrl fs function, >> for example resctrl_create_mon_resource_debugfs(struct rdt_resource *r), that will initialize >> rdt_resource::arch_debug_info(*) to point to the dentry of newly created >> /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD *if* >> the associated resource is capable of monitoring What exactly is this dentry pointing to? I was mistakenly of the impression that it was a directory. Now I think that you intend it to be a single file with a name chosen by filesystem code. Is that right? If so, there needs to be "umode_t mode" and "struct file_operations *fops" arguments for architecture to say whether this file is readable, writeable, and most importantly to specify the architecture functions to be called when the user accesses this file. With added "mode" and "fops" arguments this proposal meets my needs. Choosing the exact string for the "arch_debug_name_TBD" file name that will be given to any other users needs some thought. I was planning on simply "status" since the information that I want to convey is read-only status about each of the telemetry collection aggregators. But that feels like it might be limiting if a future use includes any control options by providing a writable file. -Tony
On 6/9/25 4:34 PM, Luck, Tony wrote: > Reinette, > > Trimming to focus on why I was confused by your message. > >>> One possibility, that supports intended use while keeping the door open to support >>> future resctrl fs use of the debugfs, could be a new resctrl fs function, >>> for example resctrl_create_mon_resource_debugfs(struct rdt_resource *r), that will initialize >>> rdt_resource::arch_debug_info(*) to point to the dentry of newly created >>> /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD *if* >>> the associated resource is capable of monitoring > > What exactly is this dentry pointing to? I was mistakenly of the impression that it was a directory. Yes, it has been directory since https://lore.kernel.org/lkml/9eb9a466-2895-405a-91f7-cda75e75f7ae@intel.com/ If your impression was indeed that it was a directory then why did your patch not create a directory? I am now going to repeat what I said in https://lore.kernel.org/lkml/9eb9a466-2895-405a-91f7-cda75e75f7ae@intel.com/ > > Now I think that you intend it to be a single file with a name chosen by filesystem code. > > Is that right? Not what I have been saying, no. > > If so, there needs to be "umode_t mode" and "struct file_operations *fops" arguments > for architecture to say whether this file is readable, writeable, and most importantly > to specify the architecture functions to be called when the user accesses this file. > > With added "mode" and "fops" arguments this proposal meets my needs. > > Choosing the exact string for the "arch_debug_name_TBD" file name that This should be a directory, a directory owned by the arch where it can create debug infrastructure required by arch. The directory name chosen and assigned by resctrl fs, while arch has freedom to create more directories and add files underneath it. Goal is to isolate all arch specific debug to a known location. Again, we need to prepare for resctrl fs to potentially use debugfs for its own debug and when it does this the expectation is that the layout will mirror /sys/fs/resctrl. Creating a directory /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON and then handing it off to the arch goes *against* this. It gives arch control over a directory that should be owned by resctrl fs. What I have been trying to propose is that resctrl fs create a directory /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD and hand a dentry pointer to it to the arch where it can do what is needed to support its debugging needs. Isn't this exactly what I wrote in the snippet above? Above you respond with statement that you were under impression that it was a directory ... and then send a patch that does something else. I am so confused. Gaslighting is beneath you. > will be given to any other users needs some thought. I was planning on > simply "status" since the information that I want to convey is read-only > status about each of the telemetry collection aggregators. But that feels > like it might be limiting if a future use includes any control options by > providing a writable file. The file containing the debug information for this feature will be added within the directory that we are talking about here. Reinette
On Mon, Jun 09, 2025 at 05:30:34PM -0700, Reinette Chatre wrote:
> This should be a directory, a directory owned by the arch where it can create
> debug infrastructure required by arch. The directory name chosen and
> assigned by resctrl fs, while arch has freedom to create more directories
> and add files underneath it. Goal is to isolate all arch specific debug to
> a known location.
>
> Again, we need to prepare for resctrl fs to potentially use debugfs for its own
> debug and when it does this the expectation is that the layout will mirror
> /sys/fs/resctrl. Creating a directory /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON
> and then handing it off to the arch goes *against* this. It gives arch
> control over a directory that should be owned by resctrl fs.
>
> What I have been trying to propose is that resctrl fs create a directory
> /sys/kernel/debug/resctrl/info/<rdt_resource::name>_MON/arch_debug_name_TBD and hand
> a dentry pointer to it to the arch where it can do what is needed to support its debugging needs.
> Isn't this exactly what I wrote in the snippet above? Above you respond with
> statement that you were under impression that it was a directory ... and then
> send a patch that does something else. I am so confused. Gaslighting is
> beneath you.
For the precise name of the "arch_debug_name_TBD" directory, is simply "arch"
sufficient? That leaves every other name available for resctrl
filesystem code free choice if it does add some debug files here.
Or would $ARCH ("x86" in my case) be better to keep distinct debug name
spaces between architectures?
-Tony
© 2016 - 2025 Red Hat, Inc.