Probing MPAM is convoluted. MSCs that are integrated with a CPU may
only be accessible from those CPUs, and they may not be online.
Touching the hardware early is pointless as MPAM can't be used until
the system-wide common values for num_partid and num_pmg have been
discovered.
Start with driver probe/remove and mapping the MSC.
CC: Carl Worth <carl@os.amperecomputing.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since RFC:
* Check for status=broken DT devices.
* Moved all the files around.
* Made Kconfig symbols depend on EXPERT
---
arch/arm64/Kconfig | 1 +
drivers/Kconfig | 2 +
drivers/Makefile | 1 +
drivers/resctrl/Kconfig | 11 ++
drivers/resctrl/Makefile | 4 +
drivers/resctrl/mpam_devices.c | 336 ++++++++++++++++++++++++++++++++
drivers/resctrl/mpam_internal.h | 62 ++++++
7 files changed, 417 insertions(+)
create mode 100644 drivers/resctrl/Kconfig
create mode 100644 drivers/resctrl/Makefile
create mode 100644 drivers/resctrl/mpam_devices.c
create mode 100644 drivers/resctrl/mpam_internal.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e51ccf1da102..ea3c54e04275 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2062,6 +2062,7 @@ config ARM64_TLB_RANGE
config ARM64_MPAM
bool "Enable support for MPAM"
+ select ARM64_MPAM_DRIVER
select ACPI_MPAM if ACPI
help
Memory Partitioning and Monitoring is an optional extension
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 4915a63866b0..3054b50a2f4c 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -251,4 +251,6 @@ source "drivers/hte/Kconfig"
source "drivers/cdx/Kconfig"
+source "drivers/resctrl/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index b5749cf67044..f41cf4eddeba 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -194,5 +194,6 @@ obj-$(CONFIG_HTE) += hte/
obj-$(CONFIG_DRM_ACCEL) += accel/
obj-$(CONFIG_CDX_BUS) += cdx/
obj-$(CONFIG_DPLL) += dpll/
+obj-y += resctrl/
obj-$(CONFIG_S390) += s390/
diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig
new file mode 100644
index 000000000000..dff7b87280ab
--- /dev/null
+++ b/drivers/resctrl/Kconfig
@@ -0,0 +1,11 @@
+# Confusingly, this is everything but the CPU bits of MPAM. CPU here means
+# CPU resources, not containers or cgroups etc.
+config ARM64_MPAM_DRIVER
+ bool "MPAM driver for System IP, e,g. caches and memory controllers"
+ depends on ARM64_MPAM && EXPERT
+
+config ARM64_MPAM_DRIVER_DEBUG
+ bool "Enable debug messages from the MPAM driver."
+ depends on ARM64_MPAM_DRIVER
+ help
+ Say yes here to enable debug messages from the MPAM driver.
diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile
new file mode 100644
index 000000000000..92b48fa20108
--- /dev/null
+++ b/drivers/resctrl/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o
+mpam-y += mpam_devices.o
+
+cflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG
diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
new file mode 100644
index 000000000000..a0d9a699a6e7
--- /dev/null
+++ b/drivers/resctrl/mpam_devices.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Arm Ltd.
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <linux/acpi.h>
+#include <linux/arm_mpam.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+
+#include <acpi/pcc.h>
+
+#include "mpam_internal.h"
+
+/*
+ * mpam_list_lock protects the SRCU lists when writing. Once the
+ * mpam_enabled key is enabled these lists are read-only,
+ * unless the error interrupt disables the driver.
+ */
+static DEFINE_MUTEX(mpam_list_lock);
+static LIST_HEAD(mpam_all_msc);
+
+static struct srcu_struct mpam_srcu;
+
+/* MPAM isn't available until all the MSC have been probed. */
+static u32 mpam_num_msc;
+
+static void mpam_discovery_complete(void)
+{
+ pr_err("Discovered all MSC\n");
+}
+
+static int mpam_dt_count_msc(void)
+{
+ int count = 0;
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, "arm,mpam-msc") {
+ if (of_device_is_available(np))
+ count++;
+ }
+
+ return count;
+}
+
+static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np,
+ u32 ris_idx)
+{
+ int err = 0;
+ u32 level = 0;
+ unsigned long cache_id;
+ struct device_node *cache;
+
+ do {
+ if (of_device_is_compatible(np, "arm,mpam-cache")) {
+ cache = of_parse_phandle(np, "arm,mpam-device", 0);
+ if (!cache) {
+ pr_err("Failed to read phandle\n");
+ break;
+ }
+ } else if (of_device_is_compatible(np->parent, "cache")) {
+ cache = of_node_get(np->parent);
+ } else {
+ /* For now, only caches are supported */
+ cache = NULL;
+ break;
+ }
+
+ err = of_property_read_u32(cache, "cache-level", &level);
+ if (err) {
+ pr_err("Failed to read cache-level\n");
+ break;
+ }
+
+ cache_id = cache_of_calculate_id(cache);
+ if (cache_id == ~0UL) {
+ err = -ENOENT;
+ break;
+ }
+
+ err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level,
+ cache_id);
+ } while (0);
+ of_node_put(cache);
+
+ return err;
+}
+
+static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored)
+{
+ int err, num_ris = 0;
+ const u32 *ris_idx_p;
+ struct device_node *iter, *np;
+
+ np = msc->pdev->dev.of_node;
+ for_each_child_of_node(np, iter) {
+ ris_idx_p = of_get_property(iter, "reg", NULL);
+ if (ris_idx_p) {
+ num_ris++;
+ err = mpam_dt_parse_resource(msc, iter, *ris_idx_p);
+ if (err) {
+ of_node_put(iter);
+ return err;
+ }
+ }
+ }
+
+ if (!num_ris)
+ mpam_dt_parse_resource(msc, np, 0);
+
+ return err;
+}
+
+/*
+ * An MSC can control traffic from a set of CPUs, but may only be accessible
+ * from a (hopefully wider) set of CPUs. The common reason for this is power
+ * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the
+ * the corresponding cache may also be powered off. By making accesses from
+ * one of those CPUs, we ensure this isn't the case.
+ */
+static int update_msc_accessibility(struct mpam_msc *msc)
+{
+ struct device_node *parent;
+ u32 affinity_id;
+ int err;
+
+ if (!acpi_disabled) {
+ err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity",
+ &affinity_id);
+ if (err)
+ cpumask_copy(&msc->accessibility, cpu_possible_mask);
+ else
+ acpi_pptt_get_cpus_from_container(affinity_id,
+ &msc->accessibility);
+
+ return 0;
+ }
+
+ /* This depends on the path to of_node */
+ parent = of_get_parent(msc->pdev->dev.of_node);
+ if (parent == of_root) {
+ cpumask_copy(&msc->accessibility, cpu_possible_mask);
+ err = 0;
+ } else {
+ err = -EINVAL;
+ pr_err("Cannot determine accessibility of MSC: %s\n",
+ dev_name(&msc->pdev->dev));
+ }
+ of_node_put(parent);
+
+ return err;
+}
+
+static int fw_num_msc;
+
+static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg)
+{
+ /* TODO: wake up tasks blocked on this MSC's PCC channel */
+}
+
+static void mpam_msc_drv_remove(struct platform_device *pdev)
+{
+ struct mpam_msc *msc = platform_get_drvdata(pdev);
+
+ if (!msc)
+ return;
+
+ mutex_lock(&mpam_list_lock);
+ mpam_num_msc--;
+ platform_set_drvdata(pdev, NULL);
+ list_del_rcu(&msc->glbl_list);
+ synchronize_srcu(&mpam_srcu);
+ devm_kfree(&pdev->dev, msc);
+ mutex_unlock(&mpam_list_lock);
+}
+
+static int mpam_msc_drv_probe(struct platform_device *pdev)
+{
+ int err;
+ struct mpam_msc *msc;
+ struct resource *msc_res;
+ void *plat_data = pdev->dev.platform_data;
+
+ mutex_lock(&mpam_list_lock);
+ do {
+ msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL);
+ if (!msc) {
+ err = -ENOMEM;
+ break;
+ }
+
+ mutex_init(&msc->probe_lock);
+ mutex_init(&msc->part_sel_lock);
+ mutex_init(&msc->outer_mon_sel_lock);
+ raw_spin_lock_init(&msc->inner_mon_sel_lock);
+ msc->id = mpam_num_msc++;
+ msc->pdev = pdev;
+ INIT_LIST_HEAD_RCU(&msc->glbl_list);
+ INIT_LIST_HEAD_RCU(&msc->ris);
+
+ err = update_msc_accessibility(msc);
+ if (err)
+ break;
+ if (cpumask_empty(&msc->accessibility)) {
+ pr_err_once("msc:%u is not accessible from any CPU!",
+ msc->id);
+ err = -EINVAL;
+ break;
+ }
+
+ if (device_property_read_u32(&pdev->dev, "pcc-channel",
+ &msc->pcc_subspace_id))
+ msc->iface = MPAM_IFACE_MMIO;
+ else
+ msc->iface = MPAM_IFACE_PCC;
+
+ if (msc->iface == MPAM_IFACE_MMIO) {
+ void __iomem *io;
+
+ io = devm_platform_get_and_ioremap_resource(pdev, 0,
+ &msc_res);
+ if (IS_ERR(io)) {
+ pr_err("Failed to map MSC base address\n");
+ err = PTR_ERR(io);
+ break;
+ }
+ msc->mapped_hwpage_sz = msc_res->end - msc_res->start;
+ msc->mapped_hwpage = io;
+ } else if (msc->iface == MPAM_IFACE_PCC) {
+ msc->pcc_cl.dev = &pdev->dev;
+ msc->pcc_cl.rx_callback = mpam_pcc_rx_callback;
+ msc->pcc_cl.tx_block = false;
+ msc->pcc_cl.tx_tout = 1000; /* 1s */
+ msc->pcc_cl.knows_txdone = false;
+
+ msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl,
+ msc->pcc_subspace_id);
+ if (IS_ERR(msc->pcc_chan)) {
+ pr_err("Failed to request MSC PCC channel\n");
+ err = PTR_ERR(msc->pcc_chan);
+ break;
+ }
+ }
+
+ list_add_rcu(&msc->glbl_list, &mpam_all_msc);
+ platform_set_drvdata(pdev, msc);
+ } while (0);
+ mutex_unlock(&mpam_list_lock);
+
+ if (!err) {
+ /* Create RIS entries described by firmware */
+ if (!acpi_disabled)
+ err = acpi_mpam_parse_resources(msc, plat_data);
+ else
+ err = mpam_dt_parse_resources(msc, plat_data);
+ }
+
+ if (!err && fw_num_msc == mpam_num_msc)
+ mpam_discovery_complete();
+
+ if (err && msc)
+ mpam_msc_drv_remove(pdev);
+
+ return err;
+}
+
+static const struct of_device_id mpam_of_match[] = {
+ { .compatible = "arm,mpam-msc", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mpam_of_match);
+
+static struct platform_driver mpam_msc_driver = {
+ .driver = {
+ .name = "mpam_msc",
+ .of_match_table = of_match_ptr(mpam_of_match),
+ },
+ .probe = mpam_msc_drv_probe,
+ .remove = mpam_msc_drv_remove,
+};
+
+/*
+ * MSC that are hidden under caches are not created as platform devices
+ * as there is no cache driver. Caches are also special-cased in
+ * update_msc_accessibility().
+ */
+static void mpam_dt_create_foundling_msc(void)
+{
+ int err;
+ struct device_node *cache;
+
+ for_each_compatible_node(cache, NULL, "cache") {
+ err = of_platform_populate(cache, mpam_of_match, NULL, NULL);
+ if (err)
+ pr_err("Failed to create MSC devices under caches\n");
+ }
+}
+
+static int __init mpam_msc_driver_init(void)
+{
+ if (!system_supports_mpam())
+ return -EOPNOTSUPP;
+
+ init_srcu_struct(&mpam_srcu);
+
+ if (!acpi_disabled)
+ fw_num_msc = acpi_mpam_count_msc();
+ else
+ fw_num_msc = mpam_dt_count_msc();
+
+ if (fw_num_msc <= 0) {
+ pr_err("No MSC devices found in firmware\n");
+ return -EINVAL;
+ }
+
+ if (acpi_disabled)
+ mpam_dt_create_foundling_msc();
+
+ return platform_driver_register(&mpam_msc_driver);
+}
+subsys_initcall(mpam_msc_driver_init);
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
new file mode 100644
index 000000000000..07e0f240eaca
--- /dev/null
+++ b/drivers/resctrl/mpam_internal.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2024 Arm Ltd.
+
+#ifndef MPAM_INTERNAL_H
+#define MPAM_INTERNAL_H
+
+#include <linux/arm_mpam.h>
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/mailbox_client.h>
+#include <linux/mutex.h>
+#include <linux/resctrl.h>
+#include <linux/sizes.h>
+
+struct mpam_msc {
+ /* member of mpam_all_msc */
+ struct list_head glbl_list;
+
+ int id;
+ struct platform_device *pdev;
+
+ /* Not modified after mpam_is_enabled() becomes true */
+ enum mpam_msc_iface iface;
+ u32 pcc_subspace_id;
+ struct mbox_client pcc_cl;
+ struct pcc_mbox_chan *pcc_chan;
+ u32 nrdy_usec;
+ cpumask_t accessibility;
+
+ /*
+ * probe_lock is only take during discovery. After discovery these
+ * properties become read-only and the lists are protected by SRCU.
+ */
+ struct mutex probe_lock;
+ unsigned long ris_idxs[128 / BITS_PER_LONG];
+ u32 ris_max;
+
+ /* mpam_msc_ris of this component */
+ struct list_head ris;
+
+ /*
+ * part_sel_lock protects access to the MSC hardware registers that are
+ * affected by MPAMCFG_PART_SEL. (including the ID registers that vary
+ * by RIS).
+ * If needed, take msc->lock first.
+ */
+ struct mutex part_sel_lock;
+
+ /*
+ * mon_sel_lock protects access to the MSC hardware registers that are
+ * affeted by MPAMCFG_MON_SEL.
+ * If needed, take msc->lock first.
+ */
+ struct mutex outer_mon_sel_lock;
+ raw_spinlock_t inner_mon_sel_lock;
+ unsigned long inner_mon_sel_flags;
+
+ void __iomem *mapped_hwpage;
+ size_t mapped_hwpage_sz;
+};
+
+#endif /* MPAM_INTERNAL_H */
--
2.20.1
Hi James, On Fri, Aug 22, 2025 at 03:29:51PM +0000, James Morse wrote: > Probing MPAM is convoluted. MSCs that are integrated with a CPU may > only be accessible from those CPUs, and they may not be online. > Touching the hardware early is pointless as MPAM can't be used until > the system-wide common values for num_partid and num_pmg have been > discovered. > > Start with driver probe/remove and mapping the MSC. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > Changes since RFC: > * Check for status=broken DT devices. > * Moved all the files around. > * Made Kconfig symbols depend on EXPERT > --- [...] > diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig > new file mode 100644 > index 000000000000..dff7b87280ab > --- /dev/null > +++ b/drivers/resctrl/Kconfig > @@ -0,0 +1,11 @@ > +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means > +# CPU resources, not containers or cgroups etc. Drop confusing comment? CPUs are not mentioned other than in the comment -- I think the descriptions are sufficiently self-explanatory that they don't read onto CPUs. > +config ARM64_MPAM_DRIVER > + bool "MPAM driver for System IP, e,g. caches and memory controllers" > + depends on ARM64_MPAM && EXPERT > + > +config ARM64_MPAM_DRIVER_DEBUG > + bool "Enable debug messages from the MPAM driver." Nit: spurious full stop. (i.e., people don't add one in these one-line descriptions. They are title-like and self-delimiting, even when the text is a valid sentence.) > + depends on ARM64_MPAM_DRIVER > + help > + Say yes here to enable debug messages from the MPAM driver. > diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile > new file mode 100644 > index 000000000000..92b48fa20108 > --- /dev/null > +++ b/drivers/resctrl/Makefile > @@ -0,0 +1,4 @@ > +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o > +mpam-y += mpam_devices.o > + > +cflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG > diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > new file mode 100644 > index 000000000000..a0d9a699a6e7 > --- /dev/null > +++ b/drivers/resctrl/mpam_devices.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. > + > +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cacheinfo.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/gfp.h> > +#include <linux/list.h> > +#include <linux/lockdep.h> > +#include <linux/mutex.h> > +#include <linux/of.h> > +#include <linux/of_platform.h> > +#include <linux/platform_device.h> > +#include <linux/printk.h> > +#include <linux/slab.h> > +#include <linux/spinlock.h> > +#include <linux/srcu.h> > +#include <linux/types.h> > + > +#include <acpi/pcc.h> > + > +#include "mpam_internal.h" > + > +/* > + * mpam_list_lock protects the SRCU lists when writing. Once the > + * mpam_enabled key is enabled these lists are read-only, > + * unless the error interrupt disables the driver. > + */ > +static DEFINE_MUTEX(mpam_list_lock); > +static LIST_HEAD(mpam_all_msc); > + > +static struct srcu_struct mpam_srcu; > + > +/* MPAM isn't available until all the MSC have been probed. */ Comment doesn't really explain the variable. Maybe something like "Number of MSCs that need to be probed for MPAM to be usable" ? > +static u32 mpam_num_msc; Any particular reason this is u32 and not unsigned int? How are accesses to this protected against data races? If there are supposed to be locks to protect globals in the MPAM driver, is it worth wrapping them in access functions with a lockdep assert? Otherwise, it feels rather easy to get this wrong -- I think I've found at least one bug (see mpam_msc_drv_probe().) > + > +static void mpam_discovery_complete(void) > +{ > + pr_err("Discovered all MSC\n"); > +} As others have commented, if this is non-functional code that gets removed later on, it's probably best to drop this up-front? [...] > +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, > + u32 ris_idx) > +{ > + int err = 0; > + u32 level = 0; > + unsigned long cache_id; > + struct device_node *cache; > + > + do { > + if (of_device_is_compatible(np, "arm,mpam-cache")) { > + cache = of_parse_phandle(np, "arm,mpam-device", 0); > + if (!cache) { > + pr_err("Failed to read phandle\n"); > + break; > + } > + } else if (of_device_is_compatible(np->parent, "cache")) { > + cache = of_node_get(np->parent); > + } else { > + /* For now, only caches are supported */ > + cache = NULL; > + break; > + } > + > + err = of_property_read_u32(cache, "cache-level", &level); > + if (err) { > + pr_err("Failed to read cache-level\n"); > + break; > + } > + > + cache_id = cache_of_calculate_id(cache); > + if (cache_id == ~0UL) { The type of cache_id may change if the return type of cache_of_calculate_id() changes (see comments on patch 1). Possible #define for the exceptional value. > + err = -ENOENT; > + break; The lack of a diagnostic here is inconsistent with the level of diagnostics in the rest of the loop. > + } > + > + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, > + cache_id); > + } while (0); Abuse of do ... while () here? There is no loop. The breaks are stealth "goto"s to this statement: > + of_node_put(cache); (It works either way, but maybe gotos to an explicit label would be more readable, as well as avoiding an unnecessary level of indentation.) > + > + return err; > +} [...] > +/* > + * An MSC can control traffic from a set of CPUs, but may only be accessible > + * from a (hopefully wider) set of CPUs. The common reason for this is power > + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the > + * the corresponding cache may also be powered off. By making accesses from Nit: the the > + * one of those CPUs, we ensure this isn't the case. > + */ > +static int update_msc_accessibility(struct mpam_msc *msc) > +{ > + struct device_node *parent; > + u32 affinity_id; > + int err; > + > + if (!acpi_disabled) { > + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", > + &affinity_id); > + if (err) > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + else > + acpi_pptt_get_cpus_from_container(affinity_id, > + &msc->accessibility); > + > + return 0; > + } > + > + /* This depends on the path to of_node */ > + parent = of_get_parent(msc->pdev->dev.of_node); > + if (parent == of_root) { > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + err = 0; > + } else { > + err = -EINVAL; > + pr_err("Cannot determine accessibility of MSC: %s\n", > + dev_name(&msc->pdev->dev)); > + } > + of_node_put(parent); > + > + return err; > +} > + > +static int fw_num_msc; Does this need to be protected against data races? If individual mpam_msc_drv_probe() calls may execute on different CPUs from mpam_msc_driver_init(), then seem to be potential races here. > + > +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) > +{ > + /* TODO: wake up tasks blocked on this MSC's PCC channel */ So, is this broken in this commit? (If the series does not get broken up or applied piecemail, that's not such a concern, though.) > +} > + > +static void mpam_msc_drv_remove(struct platform_device *pdev) > +{ The MPAM driver cannot currently be built as a module. Is it possible to exercise the driver remove paths, today? > + struct mpam_msc *msc = platform_get_drvdata(pdev); > + > + if (!msc) > + return; > + > + mutex_lock(&mpam_list_lock); > + mpam_num_msc--; > + platform_set_drvdata(pdev, NULL); > + list_del_rcu(&msc->glbl_list); > + synchronize_srcu(&mpam_srcu); > + devm_kfree(&pdev->dev, msc); > + mutex_unlock(&mpam_list_lock); > +} > + > +static int mpam_msc_drv_probe(struct platform_device *pdev) > +{ > + int err; > + struct mpam_msc *msc; > + struct resource *msc_res; > + void *plat_data = pdev->dev.platform_data; > + > + mutex_lock(&mpam_list_lock); > + do { > + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); > + if (!msc) { > + err = -ENOMEM; > + break; > + } > + > + mutex_init(&msc->probe_lock); > + mutex_init(&msc->part_sel_lock); > + mutex_init(&msc->outer_mon_sel_lock); > + raw_spin_lock_init(&msc->inner_mon_sel_lock); > + msc->id = mpam_num_msc++; > + msc->pdev = pdev; > + INIT_LIST_HEAD_RCU(&msc->glbl_list); > + INIT_LIST_HEAD_RCU(&msc->ris); > + > + err = update_msc_accessibility(msc); > + if (err) > + break; > + if (cpumask_empty(&msc->accessibility)) { > + pr_err_once("msc:%u is not accessible from any CPU!", > + msc->id); > + err = -EINVAL; > + break; > + } > + > + if (device_property_read_u32(&pdev->dev, "pcc-channel", > + &msc->pcc_subspace_id)) > + msc->iface = MPAM_IFACE_MMIO; > + else > + msc->iface = MPAM_IFACE_PCC; > + > + if (msc->iface == MPAM_IFACE_MMIO) { > + void __iomem *io; > + > + io = devm_platform_get_and_ioremap_resource(pdev, 0, > + &msc_res); > + if (IS_ERR(io)) { > + pr_err("Failed to map MSC base address\n"); > + err = PTR_ERR(io); > + break; > + } > + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; > + msc->mapped_hwpage = io; > + } else if (msc->iface == MPAM_IFACE_PCC) { > + msc->pcc_cl.dev = &pdev->dev; > + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; > + msc->pcc_cl.tx_block = false; > + msc->pcc_cl.tx_tout = 1000; /* 1s */ > + msc->pcc_cl.knows_txdone = false; > + > + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, > + msc->pcc_subspace_id); > + if (IS_ERR(msc->pcc_chan)) { > + pr_err("Failed to request MSC PCC channel\n"); > + err = PTR_ERR(msc->pcc_chan); > + break; > + } > + } Should the lock be held across initialisation of the msc fields? list_add_rcu() might imply sufficient barriers to ensure that the initialisations are visible to other threads that obtain the msc pointer by iterating over mpam_all_msc. It's probably cleaner to hold the lock explicitly, though. What other ways of obtaining the msc pointer exist? > + > + list_add_rcu(&msc->glbl_list, &mpam_all_msc); > + platform_set_drvdata(pdev, msc); > + } while (0); > + mutex_unlock(&mpam_list_lock); > + > + if (!err) { > + /* Create RIS entries described by firmware */ > + if (!acpi_disabled) > + err = acpi_mpam_parse_resources(msc, plat_data); > + else > + err = mpam_dt_parse_resources(msc, plat_data); > + } > + > + if (!err && fw_num_msc == mpam_num_msc) Unlocked read of mpam_num_msc? > + mpam_discovery_complete(); > + > + if (err && msc) > + mpam_msc_drv_remove(pdev); > + > + return err; > +} > + > +static const struct of_device_id mpam_of_match[] = { > + { .compatible = "arm,mpam-msc", }, > + {}, > +}; > +MODULE_DEVICE_TABLE(of, mpam_of_match); > + > +static struct platform_driver mpam_msc_driver = { > + .driver = { > + .name = "mpam_msc", > + .of_match_table = of_match_ptr(mpam_of_match), > + }, > + .probe = mpam_msc_drv_probe, > + .remove = mpam_msc_drv_remove, > +}; > + > +/* > + * MSC that are hidden under caches are not created as platform devices > + * as there is no cache driver. Caches are also special-cased in > + * update_msc_accessibility(). > + */ Can you elaborate? I don't understand quite what this is doing. > +static void mpam_dt_create_foundling_msc(void) > +{ > + int err; > + struct device_node *cache; > + > + for_each_compatible_node(cache, NULL, "cache") { > + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); > + if (err) > + pr_err("Failed to create MSC devices under caches\n"); > + } > +} [...] > diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > new file mode 100644 > index 000000000000..07e0f240eaca > --- /dev/null > +++ b/drivers/resctrl/mpam_internal.h > @@ -0,0 +1,62 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +// Copyright (C) 2024 Arm Ltd. > + > +#ifndef MPAM_INTERNAL_H > +#define MPAM_INTERNAL_H > + > +#include <linux/arm_mpam.h> > +#include <linux/cpumask.h> > +#include <linux/io.h> > +#include <linux/mailbox_client.h> > +#include <linux/mutex.h> > +#include <linux/resctrl.h> > +#include <linux/sizes.h> > + > +struct mpam_msc { > + /* member of mpam_all_msc */ > + struct list_head glbl_list; It is worth making these names less mismatched? > + > + int id; > + struct platform_device *pdev; > + > + /* Not modified after mpam_is_enabled() becomes true */ > + enum mpam_msc_iface iface; > + u32 pcc_subspace_id; > + struct mbox_client pcc_cl; > + struct pcc_mbox_chan *pcc_chan; > + u32 nrdy_usec; > + cpumask_t accessibility; > + > + /* > + * probe_lock is only take during discovery. After discovery these > + * properties become read-only and the lists are protected by SRCU. > + */ > + struct mutex probe_lock; Can we have more clarify about the locking strategy, including details of which things each lock is supposed to apply to and when, and how (if at all) the locks are intended to nest? (Similarly for the global locks.) > + unsigned long ris_idxs[128 / BITS_PER_LONG]; > + u32 ris_max; nrdy_usec, ris_idxs and ris_max appear unused in this patch (though I suppose they get initialised by virtue of kzalloc()). Is this intentional? > + > + /* mpam_msc_ris of this component */ > + struct list_head ris; > + > + /* > + * part_sel_lock protects access to the MSC hardware registers that are > + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary > + * by RIS). > + * If needed, take msc->lock first. > + */ What's msc->lock ? > + struct mutex part_sel_lock; > + > + /* > + * mon_sel_lock protects access to the MSC hardware registers that are > + * affeted by MPAMCFG_MON_SEL. > + * If needed, take msc->lock first. > + */ Same here. > + struct mutex outer_mon_sel_lock; > + raw_spinlock_t inner_mon_sel_lock; > + unsigned long inner_mon_sel_flags; > + > + void __iomem *mapped_hwpage; > + size_t mapped_hwpage_sz; > +}; > + > +#endif /* MPAM_INTERNAL_H */ [...] Cheers ---Dave
Hi Dave, On 01/09/2025 12:21, Dave Martin wrote: > On Fri, Aug 22, 2025 at 03:29:51PM +0000, James Morse wrote: >> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >> only be accessible from those CPUs, and they may not be online. >> Touching the hardware early is pointless as MPAM can't be used until >> the system-wide common values for num_partid and num_pmg have been >> discovered. >> >> Start with driver probe/remove and mapping the MSC. >> diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig >> new file mode 100644 >> index 000000000000..dff7b87280ab >> --- /dev/null >> +++ b/drivers/resctrl/Kconfig >> @@ -0,0 +1,11 @@ >> +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means >> +# CPU resources, not containers or cgroups etc. > > Drop confusing comment? > > CPUs are not mentioned other than in the comment -- I think the > descriptions are sufficiently self-explanatory that they don't read > onto CPUs. This used to add ARM_CPU_RESCTRL, to mirror X86_CPU_RESCTRL. It's been tidied up since then, but the comment remains. I'll remove it. >> +config ARM64_MPAM_DRIVER >> + bool "MPAM driver for System IP, e,g. caches and memory controllers" >> + depends on ARM64_MPAM && EXPERT >> + >> +config ARM64_MPAM_DRIVER_DEBUG >> + bool "Enable debug messages from the MPAM driver." > > Nit: spurious full stop. > > (i.e., people don't add one in these one-line descriptions. > They are title-like and self-delimiting, even when the text is a valid > sentence.) /me waves hands around >> + depends on ARM64_MPAM_DRIVER >> + help >> + Say yes here to enable debug messages from the MPAM driver. >> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c >> new file mode 100644 >> index 000000000000..a0d9a699a6e7 >> --- /dev/null >> +++ b/drivers/resctrl/mpam_devices.c >> @@ -0,0 +1,336 @@ >> +/* >> + * mpam_list_lock protects the SRCU lists when writing. Once the >> + * mpam_enabled key is enabled these lists are read-only, >> + * unless the error interrupt disables the driver. >> + */ >> +static DEFINE_MUTEX(mpam_list_lock); >> +static LIST_HEAD(mpam_all_msc); >> + >> +static struct srcu_struct mpam_srcu; >> + >> +/* MPAM isn't available until all the MSC have been probed. */ > > Comment doesn't really explain the variable. > > Maybe something like "Number of MSCs that need to be probed for MPAM > to be usable" ? Its the count not the remainder. I went with: | * Number of MSCs that have been probed. Once all MSC have been probed MPAM | * can be enabled. >> +static u32 mpam_num_msc; > > Any particular reason this is u32 and not unsigned int? u32 is less typing! > How are accesses to this protected against data races? It's under the list-lock, but after Rob's feedback I've made it an atomic_t and stopped using it as an id for all the print messages. > If there are supposed to be locks to protect globals in the MPAM driver, > is it worth wrapping them in access functions with a lockdep assert? > Otherwise, it feels rather easy to get this wrong -- I think I've found > at least one bug (see mpam_msc_drv_probe().) Broadly: everything is protected by the list_lock when things are being discovered. Once everything has been discovered, these things can become read-only. It's not until everything has been discovered that interrupts get registered, and things like a potential PMU driver could make calls in a strange context. Adding helpers would need some global state variable, if (state == foo) lockdep_assert... I had that early on, but figured it was overkill. >> +static void mpam_discovery_complete(void) >> +{ >> + pr_err("Discovered all MSC\n"); >> +} > As others have commented, if this is non-functional code that gets > removed later on, it's probably best to drop this up-front? It's illustrating that something happens after all the MSC have been discovered. Knowing that from the beginning of the series is supposed to make the insertion of the cpuhp notifiers in the middle easier to think about... >> +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, >> + u32 ris_idx) >> +{ >> + int err = 0; >> + u32 level = 0; >> + unsigned long cache_id; >> + struct device_node *cache; >> + >> + do { >> + if (of_device_is_compatible(np, "arm,mpam-cache")) { >> + cache = of_parse_phandle(np, "arm,mpam-device", 0); >> + if (!cache) { >> + pr_err("Failed to read phandle\n"); >> + break; >> + } >> + } else if (of_device_is_compatible(np->parent, "cache")) { >> + cache = of_node_get(np->parent); >> + } else { >> + /* For now, only caches are supported */ >> + cache = NULL; >> + break; >> + } >> + >> + err = of_property_read_u32(cache, "cache-level", &level); >> + if (err) { >> + pr_err("Failed to read cache-level\n"); >> + break; >> + } >> + >> + cache_id = cache_of_calculate_id(cache); >> + if (cache_id == ~0UL) { > > The type of cache_id may change if the return type of > cache_of_calculate_id() changes (see comments on patch 1). Yup, > Possible #define for the exceptional value. I don't think its any more surprising than '-1' as an error value, and its only got one caller, which is pretty obviously an error path. >> + err = -ENOENT; >> + break; > > The lack of a diagnostic here is inconsistent with the level of > diagnostics in the rest of the loop. I've never needed to debug that one because its already visible to user-space. If the cache-id's are missing, you can tell that in sysfs, you don't need to instrument the kernel. I'll add one if you think its important. They can all be _once, and as its related to the probing of a particular device, can use the dev_ print helpers. >> + } >> + >> + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, >> + cache_id); >> + } while (0); > > Abuse of do ... while () here? > > There is no loop. The breaks are stealth "goto"s to this statement: Yes. The alternative would be actual gotos - which is surely worse! It just wasn't worth pulling this out as a separate function. >> + of_node_put(cache); > > (It works either way, but maybe gotos to an explicit label would be > more readable, as well as avoiding an unnecessary level of indentation.) As the cleanup magic has become fashionable, I'll switch to using that... >> + >> + return err; >> +} > > [...] > >> +/* >> + * An MSC can control traffic from a set of CPUs, but may only be accessible >> + * from a (hopefully wider) set of CPUs. The common reason for this is power >> + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the >> + * the corresponding cache may also be powered off. By making accesses from > > Nit: the the Fixed, >> + * one of those CPUs, we ensure this isn't the case. >> + */ >> +static int fw_num_msc; > > Does this need to be protected against data races? > > If individual mpam_msc_drv_probe() calls may execute on different CPUs > from mpam_msc_driver_init(), then seem to be potential races here. Incrementing was under the list-lock, but not the last 'are they all done' read. Following Rob's comments I've made this an atomic_t. >> + >> +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) >> +{ >> + /* TODO: wake up tasks blocked on this MSC's PCC channel */ > > So, is this broken in this commit?> > (If the series does not get broken up or applied piecemail, that's not > such a concern, though.) Unsupported - or at least only enough to not mistake them for MMIO devices. I've pulled this out to a later patch in the tree that isn't in this series. The platforms that need this haven't yet materialised, (and may not!). There is a prototype for DT/SCMI, but nothing I've seen yet for ACPI. >> +} >> + >> +static void mpam_msc_drv_remove(struct platform_device *pdev) >> +{ > > The MPAM driver cannot currently be built as a module. > > Is it possible to exercise the driver remove paths, today? Yes, through the sysfs unbind interface. It doesn't make a lot of sense for MPAM as the moment you unbind the driver from one MSC it has to work out if it needs to teardown resctrl... >> + struct mpam_msc *msc = platform_get_drvdata(pdev); >> + >> + if (!msc) >> + return; >> + >> + mutex_lock(&mpam_list_lock); >> + mpam_num_msc--; >> + platform_set_drvdata(pdev, NULL); >> + list_del_rcu(&msc->glbl_list); >> + synchronize_srcu(&mpam_srcu); >> + devm_kfree(&pdev->dev, msc); >> + mutex_unlock(&mpam_list_lock); >> +} >> + >> +static int mpam_msc_drv_probe(struct platform_device *pdev) >> +{ >> + int err; >> + struct mpam_msc *msc; >> + struct resource *msc_res; >> + void *plat_data = pdev->dev.platform_data; >> + >> + mutex_lock(&mpam_list_lock); >> + do { >> + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); >> + if (!msc) { >> + err = -ENOMEM; >> + break; >> + } >> + >> + mutex_init(&msc->probe_lock); >> + mutex_init(&msc->part_sel_lock); >> + mutex_init(&msc->outer_mon_sel_lock); >> + raw_spin_lock_init(&msc->inner_mon_sel_lock); >> + msc->id = mpam_num_msc++; >> + msc->pdev = pdev; >> + INIT_LIST_HEAD_RCU(&msc->glbl_list); >> + INIT_LIST_HEAD_RCU(&msc->ris); >> + >> + err = update_msc_accessibility(msc); >> + if (err) >> + break; >> + if (cpumask_empty(&msc->accessibility)) { >> + pr_err_once("msc:%u is not accessible from any CPU!", >> + msc->id); >> + err = -EINVAL; >> + break; >> + } >> + >> + if (device_property_read_u32(&pdev->dev, "pcc-channel", >> + &msc->pcc_subspace_id)) >> + msc->iface = MPAM_IFACE_MMIO; >> + else >> + msc->iface = MPAM_IFACE_PCC; >> + >> + if (msc->iface == MPAM_IFACE_MMIO) { >> + void __iomem *io; >> + >> + io = devm_platform_get_and_ioremap_resource(pdev, 0, >> + &msc_res); >> + if (IS_ERR(io)) { >> + pr_err("Failed to map MSC base address\n"); >> + err = PTR_ERR(io); >> + break; >> + } >> + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; >> + msc->mapped_hwpage = io; >> + } else if (msc->iface == MPAM_IFACE_PCC) { >> + msc->pcc_cl.dev = &pdev->dev; >> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; >> + msc->pcc_cl.tx_block = false; >> + msc->pcc_cl.tx_tout = 1000; /* 1s */ >> + msc->pcc_cl.knows_txdone = false; >> + >> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, >> + msc->pcc_subspace_id); >> + if (IS_ERR(msc->pcc_chan)) { >> + pr_err("Failed to request MSC PCC channel\n"); >> + err = PTR_ERR(msc->pcc_chan); >> + break; >> + } >> + } > Should the lock be held across initialisation of the msc fields? The msc isn't visible until its added to the list, so provided all that inialisation is done 'before' its added to the list, it doesn't matter. > list_add_rcu() might imply sufficient barriers to ensure that the > initialisations are visible to other threads that obtain the msc > pointer by iterating over mpam_all_msc. > > It's probably cleaner to hold the lock explicitly, though. The list lock? We do. But the readers don't need to take the list lock, its only there to prevent concurrent writers. > What other ways of obtaining the msc pointer exist? The class/component/device structures in a subsequent patch, protected in the same way. Once MPAM is enabled all that can be sprayed through resctrl - at which point no modifications are allowed, and teardown for fatal errors depends on the static-key. >> + >> + list_add_rcu(&msc->glbl_list, &mpam_all_msc); >> + platform_set_drvdata(pdev, msc); >> + } while (0); >> + mutex_unlock(&mpam_list_lock); >> + >> + if (!err) { >> + /* Create RIS entries described by firmware */ >> + if (!acpi_disabled) >> + err = acpi_mpam_parse_resources(msc, plat_data); >> + else >> + err = mpam_dt_parse_resources(msc, plat_data); >> + } >> + >> + if (!err && fw_num_msc == mpam_num_msc) > Unlocked read of mpam_num_msc? Fixed as an atomic_t flavoured thing. >> + mpam_discovery_complete(); >> + >> + if (err && msc) >> + mpam_msc_drv_remove(pdev); >> + >> + return err; >> +} >> +/* >> + * MSC that are hidden under caches are not created as platform devices >> + * as there is no cache driver. Caches are also special-cased in >> + * update_msc_accessibility(). >> + */ > > Can you elaborate? I don't understand quite what this is doing. / { my_thing { compatible = "my_thing"; msc { compatible = "arm,mpam-msc"; }; }; other_thing { compatible = "other_thing"; }; msc { compatible = "arm,mpam-msc"; arm,mpam-device = <&other_thing>; }; l2-cache { compatible = "cache"; msc { compatible = "arm,mpam-msc"; }; }; }; my_thing and other_thing's MSC will have devices created - but the cache will not, because it's a cache not a device, and anything below it is ignored. >> +static void mpam_dt_create_foundling_msc(void) >> +{ >> + int err; >> + struct device_node *cache; >> + >> + for_each_compatible_node(cache, NULL, "cache") { >> + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); >> + if (err) >> + pr_err("Failed to create MSC devices under caches\n"); >> + } >> +} > > [...] > >> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h >> new file mode 100644 >> index 000000000000..07e0f240eaca >> --- /dev/null >> +++ b/drivers/resctrl/mpam_internal.h >> @@ -0,0 +1,62 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +// Copyright (C) 2024 Arm Ltd. >> + >> +#ifndef MPAM_INTERNAL_H >> +#define MPAM_INTERNAL_H >> + >> +#include <linux/arm_mpam.h> >> +#include <linux/cpumask.h> >> +#include <linux/io.h> >> +#include <linux/mailbox_client.h> >> +#include <linux/mutex.h> >> +#include <linux/resctrl.h> >> +#include <linux/sizes.h> >> + >> +struct mpam_msc { >> + /* member of mpam_all_msc */ >> + struct list_head glbl_list; > It is worth making these names less mismatched? all_msc_list ? It's because its global. The pattern otherwise is parent has a list foo, and all the children have a member 'foo_list'. >> + >> + int id; >> + struct platform_device *pdev; >> + >> + /* Not modified after mpam_is_enabled() becomes true */ >> + enum mpam_msc_iface iface; >> + u32 pcc_subspace_id; >> + struct mbox_client pcc_cl; >> + struct pcc_mbox_chan *pcc_chan; >> + u32 nrdy_usec; >> + cpumask_t accessibility; >> + >> + /* >> + * probe_lock is only take during discovery. After discovery these >> + * properties become read-only and the lists are protected by SRCU. >> + */ >> + struct mutex probe_lock; > > Can we have more clarify about the locking strategy, including details > of which things each lock is supposed to apply to and when, and how (if > at all) the locks are intended to nest? The comment above is supposed to describe that. A rule of thumb is its all the stuff 'below' it in the struct, there is also a comment for each order. This one is a bit of a catch all for the values in this struct that can be written. The purpose of the part_sel_lock and mon_sel_lock arrangement becomes much more obvious in subsequent patches when the driver starts accessing the registers of those names. > (Similarly for the global locks.) > >> + unsigned long ris_idxs[128 / BITS_PER_LONG]; >> + u32 ris_max; > nrdy_usec, ris_idxs and ris_max appear unused in this patch (though I > suppose they get initialised by virtue of kzalloc()). Is this > intentional? To reduce the amount of churn in the series the bulk of the structure is added here, then the stuff that build more complicated structures, accesses hardware, deals with interrupts etc. >> + >> + /* mpam_msc_ris of this component */ >> + struct list_head ris; >> + >> + /* >> + * part_sel_lock protects access to the MSC hardware registers that are >> + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary >> + * by RIS). >> + * If needed, take msc->lock first. >> + */ > What's msc->lock ? The old name for probe_lock, before it was necessary to lock the hardware registers separately. I've fixed the name. Thanks, James
On Fri, Sep 05, 2025 at 07:49:37PM +0100, James Morse wrote: > Hi Dave, > > On 01/09/2025 12:21, Dave Martin wrote: > > On Fri, Aug 22, 2025 at 03:29:51PM +0000, James Morse wrote: > >> Probing MPAM is convoluted. MSCs that are integrated with a CPU may > >> only be accessible from those CPUs, and they may not be online. > >> Touching the hardware early is pointless as MPAM can't be used until > >> the system-wide common values for num_partid and num_pmg have been > >> discovered. > >> > >> Start with driver probe/remove and mapping the MSC. > >> diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig > >> new file mode 100644 > >> index 000000000000..dff7b87280ab > >> --- /dev/null > >> +++ b/drivers/resctrl/Kconfig > >> @@ -0,0 +1,11 @@ > >> +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means > >> +# CPU resources, not containers or cgroups etc. > > > > Drop confusing comment? > > > > CPUs are not mentioned other than in the comment -- I think the > > descriptions are sufficiently self-explanatory that they don't read > > onto CPUs. > > This used to add ARM_CPU_RESCTRL, to mirror X86_CPU_RESCTRL. > It's been tidied up since then, but the comment remains. > I'll remove it. OK > >> +config ARM64_MPAM_DRIVER > >> + bool "MPAM driver for System IP, e,g. caches and memory controllers" > >> + depends on ARM64_MPAM && EXPERT > >> + > >> +config ARM64_MPAM_DRIVER_DEBUG > >> + bool "Enable debug messages from the MPAM driver." > > > > Nit: spurious full stop. > > > > (i.e., people don't add one in these one-line descriptions. > > They are title-like and self-delimiting, even when the text is a valid > > sentence.) > > /me waves hands around I did say "Nit" ;) (This was mainly a "hmm, this doesn't look quite like the rest" thing.) > >> + depends on ARM64_MPAM_DRIVER > >> + help > >> + Say yes here to enable debug messages from the MPAM driver. > > >> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > >> new file mode 100644 > >> index 000000000000..a0d9a699a6e7 > >> --- /dev/null > >> +++ b/drivers/resctrl/mpam_devices.c > >> @@ -0,0 +1,336 @@ > > >> +/* > >> + * mpam_list_lock protects the SRCU lists when writing. Once the > >> + * mpam_enabled key is enabled these lists are read-only, > >> + * unless the error interrupt disables the driver. > >> + */ > >> +static DEFINE_MUTEX(mpam_list_lock); > >> +static LIST_HEAD(mpam_all_msc); > >> + > >> +static struct srcu_struct mpam_srcu; > >> + > >> +/* MPAM isn't available until all the MSC have been probed. */ > > > > Comment doesn't really explain the variable. > > > > Maybe something like "Number of MSCs that need to be probed for MPAM > > to be usable" ? > > Its the count not the remainder. I went with: > | * Number of MSCs that have been probed. Once all MSC have been probed MPAM > | * can be enabled. Thanks, that's clearer. > >> +static u32 mpam_num_msc; > > > > Any particular reason this is u32 and not unsigned int? > u32 is less typing! But more effort for the reader / reviewer -- it's semantic noise. (It works either way though, obviosly. It looks like this code may have caught fixed-size-itis off the resctrl code, to some degree.) > > How are accesses to this protected against data races? > > It's under the list-lock, but after Rob's feedback I've made it an atomic_t and stopped > using it as an id for all the print messages. Converting to atomic_t reduces the chance of people asking the question, but doesn't really answer the question. Since mpam_num_msc shadows the contents of the lists and msc data structures, it may matter whether the two can be seen out of sync. Does it definitely not matter? > > If there are supposed to be locks to protect globals in the MPAM driver, > > is it worth wrapping them in access functions with a lockdep assert? > > Otherwise, it feels rather easy to get this wrong -- I think I've found > > at least one bug (see mpam_msc_drv_probe().) > Broadly: everything is protected by the list_lock when things are being discovered. > Once everything has been discovered, these things can become read-only. > > It's not until everything has been discovered that interrupts get registered, and things > like a potential PMU driver could make calls in a strange context. > > Adding helpers would need some global state variable, if (state == foo) lockdep_assert... > I had that early on, but figured it was overkill. I wonder whether it would be worth migrating this, so that the probe- time variables (which are read-write) can be kept separate from the run-time system description variables (which are mostly write-once). This would avoid having to support two different locking scenarios with a single mechanism. I have a bit of a concern that there are too many synchronisation mechanisms in use, with purposes that overlap and are in some cases not clealy described and not obvious -- at least, not to me. I don't think the fact that there have been few comments on this area necessarily indicates that other reviewers have fully understood the locking. > >> +static void mpam_discovery_complete(void) > >> +{ > >> + pr_err("Discovered all MSC\n"); > >> +} > > > As others have commented, if this is non-functional code that gets > > removed later on, it's probably best to drop this up-front? > It's illustrating that something happens after all the MSC have been discovered. > Knowing that from the beginning of the series is supposed to make the insertion of the > cpuhp notifiers in the middle easier to think about... So long as this was not unintentionally left behind when splitting the series, I guess it's OK to have it here -- as you say, it does motivate the shape that the code will eventually need to have. > >> +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, > >> + u32 ris_idx) > >> +{ > >> + int err = 0; > >> + u32 level = 0; > >> + unsigned long cache_id; > >> + struct device_node *cache; > >> + > >> + do { > >> + if (of_device_is_compatible(np, "arm,mpam-cache")) { > >> + cache = of_parse_phandle(np, "arm,mpam-device", 0); > >> + if (!cache) { > >> + pr_err("Failed to read phandle\n"); > >> + break; > >> + } > >> + } else if (of_device_is_compatible(np->parent, "cache")) { > >> + cache = of_node_get(np->parent); > >> + } else { > >> + /* For now, only caches are supported */ > >> + cache = NULL; > >> + break; > >> + } > >> + > >> + err = of_property_read_u32(cache, "cache-level", &level); > >> + if (err) { > >> + pr_err("Failed to read cache-level\n"); > >> + break; > >> + } > >> + > >> + cache_id = cache_of_calculate_id(cache); > >> + if (cache_id == ~0UL) { > > > > The type of cache_id may change if the return type of > > cache_of_calculate_id() changes (see comments on patch 1). > > Yup, > > > Possible #define for the exceptional value. > > I don't think its any more surprising than '-1' as an error value, and its only got one > caller, which is pretty obviously an error path. A #define is not essential. (As I say elsewhere, I don't entirely trust uses of ~ where the types may be mixed. But this case look low- isk for future maintenance.) > >> + err = -ENOENT; > >> + break; > > > > The lack of a diagnostic here is inconsistent with the level of > > diagnostics in the rest of the loop. > > I've never needed to debug that one because its already visible to user-space. If the > cache-id's are missing, you can tell that in sysfs, you don't need to instrument the kernel. > > I'll add one if you think its important. They can all be _once, and as its related to > the probing of a particular device, can use the dev_ print helpers. No, I guess that's fine. I don't have a strong feel for which things a user is likely to have to debug in practice, and I accept that we shouldn't try to cover absolutely everything. > >> + } > >> + > >> + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, > >> + cache_id); > >> + } while (0); > > > > Abuse of do ... while () here? > > > > There is no loop. The breaks are stealth "goto"s to this statement: > Yes. The alternative would be actual gotos - which is surely worse! > It just wasn't worth pulling this out as a separate function. Again, it's semantic noise. do ... while is a looping construct. If you write "do {", the reader expects a loop and has to analyse the code in order to conclude that there isn't. Using a loop in order to avoid giving a label to the sequence point at the closing bracket is a neat trick, but it's not helpful to someone reading the code. I can live with it, but as an idiom this seems rare. Use of gotos for doing cleanup seems to be the most common idiom for this in the kernel. It may be inelegant, but it is likely to be readily understood. However... > > > >> + of_node_put(cache); > > > > (It works either way, but maybe gotos to an explicit label would be > > more readable, as well as avoiding an unnecessary level of indentation.) > As the cleanup magic has become fashionable, I'll switch to using that... ...I guess that works, too. (Not that I much like that clunky bolt-on extension to the language, but it should do the job and at least avoids arguments about precisely how or where the cleanup happens.) > >> + > >> + return err; > >> +} > > > > [...] > > > >> +/* > >> + * An MSC can control traffic from a set of CPUs, but may only be accessible > >> + * from a (hopefully wider) set of CPUs. The common reason for this is power > >> + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the > >> + * the corresponding cache may also be powered off. By making accesses from > > > > Nit: the the > > Fixed, > > > >> + * one of those CPUs, we ensure this isn't the case. > >> + */ > > >> +static int fw_num_msc; > > > > Does this need to be protected against data races? > > > > If individual mpam_msc_drv_probe() calls may execute on different CPUs > > from mpam_msc_driver_init(), then seem to be potential races here. > > Incrementing was under the list-lock, but not the last 'are they all done' read. Following > Rob's comments I've made this an atomic_t. As with mpam_num_msc, this eliminates data races on fw_num_msc, but races between this variable and other data structures may remain. Can you explain what prevents such races, or why they don't matter? > > > >> + > >> +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) > >> +{ > >> + /* TODO: wake up tasks blocked on this MSC's PCC channel */ > > > > So, is this broken in this commit?> > > (If the series does not get broken up or applied piecemail, that's not > > such a concern, though.) > Unsupported - or at least only enough to not mistake them for MMIO devices. > I've pulled this out to a later patch in the tree that isn't in this series. > > The platforms that need this haven't yet materialised, (and may not!). > There is a prototype for DT/SCMI, but nothing I've seen yet for ACPI. OK; I guess deferring the introduction of this until there is more context for it makes sense. > >> +} > >> + > >> +static void mpam_msc_drv_remove(struct platform_device *pdev) > >> +{ > > > > The MPAM driver cannot currently be built as a module. > > > > Is it possible to exercise the driver remove paths, today? > Yes, through the sysfs unbind interface. > > It doesn't make a lot of sense for MPAM as the moment you unbind the driver from one MSC > it has to work out if it needs to teardown resctrl... Has to, but doesn't? Have I missed something? Are we supposed to put the MSC back into a sane state, for e.g. the kexec path? For resctrl, one option would be to stub out the backend -- i.e., we don't tell resctrl that the affected resources disappeared, but attempts to manipulate the affected MSC(s) are stubbed out (similarly to what happens to an open tty after a hangup). (Something along these lines may be done somewhere, but I'm not currently aware of it.) Is there an outstanding get on the device that prevents us from getting here until resctrl is shut down? Thanks to the wisdom and restraint of systemd I'd expect resctrl to tangled up in some rat's nest of unremovable mounts by the time we try to shut down, but I hope I'm being pessimistic. (Arguably that's not our bug, if so.) > >> + struct mpam_msc *msc = platform_get_drvdata(pdev); > >> + > >> + if (!msc) > >> + return; > >> + > >> + mutex_lock(&mpam_list_lock); > >> + mpam_num_msc--; > >> + platform_set_drvdata(pdev, NULL); > >> + list_del_rcu(&msc->glbl_list); > >> + synchronize_srcu(&mpam_srcu); > >> + devm_kfree(&pdev->dev, msc); > >> + mutex_unlock(&mpam_list_lock); > >> +} > >> + > >> +static int mpam_msc_drv_probe(struct platform_device *pdev) > >> +{ > >> + int err; > >> + struct mpam_msc *msc; > >> + struct resource *msc_res; > >> + void *plat_data = pdev->dev.platform_data; > >> + > >> + mutex_lock(&mpam_list_lock); > >> + do { > >> + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); > >> + if (!msc) { > >> + err = -ENOMEM; > >> + break; > >> + } > >> + > >> + mutex_init(&msc->probe_lock); > >> + mutex_init(&msc->part_sel_lock); > >> + mutex_init(&msc->outer_mon_sel_lock); > >> + raw_spin_lock_init(&msc->inner_mon_sel_lock); > >> + msc->id = mpam_num_msc++; > >> + msc->pdev = pdev; > >> + INIT_LIST_HEAD_RCU(&msc->glbl_list); msc->glbl_list is not a list head? Does it need to be initialised at all? list_add_rcu() will just splat it, by the looks of it. > >> + INIT_LIST_HEAD_RCU(&msc->ris); Maybe INIT_LIST_HEAD_RCU() isn't needed here. Do we ever access this list without holding one of the MSC locks? This list is not used until a cpuhp hook comes in to probe the MSC, and then mpam_discovery_cpu_online() obtains a pointer via list_for_each_entry() -- but this is not RCU-protected. The MSC probe lock is taken, there... > >> + > >> + err = update_msc_accessibility(msc); > >> + if (err) > >> + break; > >> + if (cpumask_empty(&msc->accessibility)) { > >> + pr_err_once("msc:%u is not accessible from any CPU!", > >> + msc->id); > >> + err = -EINVAL; > >> + break; > >> + } > >> + > >> + if (device_property_read_u32(&pdev->dev, "pcc-channel", > >> + &msc->pcc_subspace_id)) > >> + msc->iface = MPAM_IFACE_MMIO; > >> + else > >> + msc->iface = MPAM_IFACE_PCC; > >> + > >> + if (msc->iface == MPAM_IFACE_MMIO) { > >> + void __iomem *io; > >> + > >> + io = devm_platform_get_and_ioremap_resource(pdev, 0, > >> + &msc_res); > >> + if (IS_ERR(io)) { > >> + pr_err("Failed to map MSC base address\n"); > >> + err = PTR_ERR(io); > >> + break; > >> + } > >> + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; > >> + msc->mapped_hwpage = io; > >> + } else if (msc->iface == MPAM_IFACE_PCC) { > >> + msc->pcc_cl.dev = &pdev->dev; > >> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; > >> + msc->pcc_cl.tx_block = false; > >> + msc->pcc_cl.tx_tout = 1000; /* 1s */ > >> + msc->pcc_cl.knows_txdone = false; ... so, it feels like we may need to hold the probe lock, or ensure that all iterations over the msc list are RCU-protected (see below for counterexamples), or both. > >> + > >> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, > >> + msc->pcc_subspace_id); > >> + if (IS_ERR(msc->pcc_chan)) { > >> + pr_err("Failed to request MSC PCC channel\n"); > >> + err = PTR_ERR(msc->pcc_chan); > >> + break; > >> + } > >> + } > > > Should the lock be held across initialisation of the msc fields? > > The msc isn't visible until its added to the list, so provided all that inialisation is > done 'before' its added to the list, it doesn't matter. Is it possible for other code to get a pointer to the new msc after this, other than by dereferencing the list? (The obvious case is the interrupt handlers, but it looks like the msc pointers used for registering the interrupts are indeed obtained through an RCU-protected iteration over mpam_all_msc.) Note, there seem to be non-RCU-protected iterations over mpam_all_msc in the mpam_discovery_cpu_online() (patch 14) and mpam_enable_merge_features() paths (patch 18). The lack of symmetry between list maintenance and consumption look a little suspect for those -- is safety ensured in some other way? > > list_add_rcu() might imply sufficient barriers to ensure that the > > initialisations are visible to other threads that obtain the msc > > pointer by iterating over mpam_all_msc. > > > > It's probably cleaner to hold the lock explicitly, though. > > The list lock? We do. > But the readers don't need to take the list lock, its only there to prevent concurrent > writers. I meant whatever lock is supposed to protect the fields of the specific msc. The list lock is not that lock. > > > > What other ways of obtaining the msc pointer exist? > The class/component/device structures in a subsequent patch, protected in the same way. > Once MPAM is enabled all that can be sprayed through resctrl - at which point no > modifications are allowed, and teardown for fatal errors depends on the static-key. I'll bear this in mind as I review. > >> + > >> + list_add_rcu(&msc->glbl_list, &mpam_all_msc); > >> + platform_set_drvdata(pdev, msc); > >> + } while (0); > >> + mutex_unlock(&mpam_list_lock); > >> + > >> + if (!err) { > >> + /* Create RIS entries described by firmware */ > >> + if (!acpi_disabled) > >> + err = acpi_mpam_parse_resources(msc, plat_data); > >> + else > >> + err = mpam_dt_parse_resources(msc, plat_data); > >> + } > >> + > >> + if (!err && fw_num_msc == mpam_num_msc) > > > Unlocked read of mpam_num_msc? > Fixed as an atomic_t flavoured thing. Ditto comments above about whether it is a problem that mpam_num_msc can now be seen out of sync with the list. > >> + mpam_discovery_complete(); > >> + > >> + if (err && msc) > >> + mpam_msc_drv_remove(pdev); > >> + > >> + return err; > >> +} > >> +/* > >> + * MSC that are hidden under caches are not created as platform devices > >> + * as there is no cache driver. Caches are also special-cased in > >> + * update_msc_accessibility(). > >> + */ > > > > Can you elaborate? I don't understand quite what this is doing. > > / { > my_thing { > compatible = "my_thing"; > msc { > compatible = "arm,mpam-msc"; > }; > }; > > other_thing { > compatible = "other_thing"; > }; > > msc { > compatible = "arm,mpam-msc"; > arm,mpam-device = <&other_thing>; > }; > > > l2-cache { > compatible = "cache"; > msc { > compatible = "arm,mpam-msc"; > }; > }; > }; > > my_thing and other_thing's MSC will have devices created - but the cache will not, because > it's a cache not a device, and anything below it is ignored. OK. Maybe reword as something like: --8<-- MSCs that are declared by the firmware as being part of a cache may not be created automatically as platform devices, since there is no dedicated cache driver. Deal with those MSCs here. -->8-- Maybe add a comment at update_msc_accessibility() that references this comment, instead of a reader of that function just needing to know that this comment is here? > > > >> +static void mpam_dt_create_foundling_msc(void) > >> +{ > >> + int err; > >> + struct device_node *cache; > >> + > >> + for_each_compatible_node(cache, NULL, "cache") { > >> + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); > >> + if (err) > >> + pr_err("Failed to create MSC devices under caches\n"); > >> + } > >> +} > > > > [...] > > > >> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > >> new file mode 100644 > >> index 000000000000..07e0f240eaca > >> --- /dev/null > >> +++ b/drivers/resctrl/mpam_internal.h > >> @@ -0,0 +1,62 @@ > >> +/* SPDX-License-Identifier: GPL-2.0 */ > >> +// Copyright (C) 2024 Arm Ltd. > >> + > >> +#ifndef MPAM_INTERNAL_H > >> +#define MPAM_INTERNAL_H > >> + > >> +#include <linux/arm_mpam.h> > >> +#include <linux/cpumask.h> > >> +#include <linux/io.h> > >> +#include <linux/mailbox_client.h> > >> +#include <linux/mutex.h> > >> +#include <linux/resctrl.h> > >> +#include <linux/sizes.h> > >> + > >> +struct mpam_msc { > >> + /* member of mpam_all_msc */ > >> + struct list_head glbl_list; > > > It is worth making these names less mismatched? > > all_msc_list ? > > It's because its global. The pattern otherwise is parent has a list foo, and all the > children have a member 'foo_list'. "all_msc", then? I'm not sure that it's essential for the list head's name to have "list" in it: that's clear from the type and from how it is used. "all" seems sufficient to imply that this is a list (?) It doesn't get much more global than "all". (This is purely cosmetic, of course.) > > > >> + > >> + int id; > >> + struct platform_device *pdev; > >> + > >> + /* Not modified after mpam_is_enabled() becomes true */ > >> + enum mpam_msc_iface iface; > >> + u32 pcc_subspace_id; > >> + struct mbox_client pcc_cl; > >> + struct pcc_mbox_chan *pcc_chan; > >> + u32 nrdy_usec; > >> + cpumask_t accessibility; > >> + > >> + /* > >> + * probe_lock is only take during discovery. After discovery these > >> + * properties become read-only and the lists are protected by SRCU. > >> + */ > >> + struct mutex probe_lock; > > > > Can we have more clarify about the locking strategy, including details > > of which things each lock is supposed to apply to and when, and how (if > > at all) the locks are intended to nest? > > The comment above is supposed to describe that. A rule of thumb is its all the stuff > 'below' it in the struct, there is also a comment for each order. This one is a bit of a > catch all for the values in this struct that can be written. I get that, but the devil is in the detail. A lock that is "a bit of a catch-all" would always need to be taken, if safety is the goal. I guess we should try to close out the other discussions about locking before working out whether anything else is needed, here. > The purpose of the part_sel_lock and mon_sel_lock arrangement becomes much more obvious in > subsequent patches when the driver starts accessing the registers of those names. It's fair enough to defer that context until later -- though arguably these could be added later when they are actually used. Not really worth resplitting the series just for that, though. > > > > (Similarly for the global locks.) > > > >> + unsigned long ris_idxs[128 / BITS_PER_LONG]; > >> + u32 ris_max; > > > nrdy_usec, ris_idxs and ris_max appear unused in this patch (though I > > suppose they get initialised by virtue of kzalloc()). Is this > > intentional? > > To reduce the amount of churn in the series the bulk of the structure is added here, then > the stuff that build more complicated structures, accesses hardware, deals with interrupts > etc. Ack, just checking that something wasn't unintentionally dropped. > >> + > >> + /* mpam_msc_ris of this component */ > >> + struct list_head ris; > >> + > >> + /* > >> + * part_sel_lock protects access to the MSC hardware registers that are > >> + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary > >> + * by RIS). > >> + * If needed, take msc->lock first. > >> + */ > > > What's msc->lock ? > > The old name for probe_lock, before it was necessary to lock the hardware registers > separately. I've fixed the name. > > Thanks, > > James Ack Cheers ---Dave > >
Hi Dave, On 08/09/2025 16:25, Dave Martin wrote: > On Fri, Sep 05, 2025 at 07:49:37PM +0100, James Morse wrote: >> On 01/09/2025 12:21, Dave Martin wrote: >>> On Fri, Aug 22, 2025 at 03:29:51PM +0000, James Morse wrote: >>>> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >>>> only be accessible from those CPUs, and they may not be online. >>>> Touching the hardware early is pointless as MPAM can't be used until >>>> the system-wide common values for num_partid and num_pmg have been >>>> discovered. >>>> >>>> Start with driver probe/remove and mapping the MSC. >>>> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c >>>> new file mode 100644 >>>> index 000000000000..a0d9a699a6e7 >>>> --- /dev/null >>>> +++ b/drivers/resctrl/mpam_devices.c >>>> @@ -0,0 +1,336 @@ >>> How are accesses to this protected against data races? >> >> It's under the list-lock, but after Rob's feedback I've made it an atomic_t and stopped >> using it as an id for all the print messages. > > Converting to atomic_t reduces the chance of people asking the > question, but doesn't really answer the question. > > Since mpam_num_msc shadows the contents of the lists and msc data > structures, it may matter whether the two can be seen out of sync. > > Does it definitely not matter? v2 uses the firmware-table id as the id. The only thing mpam_num_msc needs to do is spot which MSC was last to be probed so that the cpuhp calls can be registered. The amount of skid that happens on the way there doesn't matter. [..] >>>> +static int fw_num_msc; >>> >>> Does this need to be protected against data races? >>> >>> If individual mpam_msc_drv_probe() calls may execute on different CPUs >>> from mpam_msc_driver_init(), then seem to be potential races here. >> >> Incrementing was under the list-lock, but not the last 'are they all done' read. Following >> Rob's comments I've made this an atomic_t. > > As with mpam_num_msc, this eliminates data races on fw_num_msc, but > races between this variable and other data structures may remain. > > Can you explain what prevents such races, or why they don't > matter? See mpam_msc_driver_init(). The value is set before the driver is registered. It can't probe before its registered, so all the readers must happen after the writer. [..] >>>> +} >>>> + >>>> +static void mpam_msc_drv_remove(struct platform_device *pdev) >>>> +{ >>> >>> The MPAM driver cannot currently be built as a module. >>> >>> Is it possible to exercise the driver remove paths, today? >> Yes, through the sysfs unbind interface. >> >> It doesn't make a lot of sense for MPAM as the moment you unbind the driver from one MSC >> it has to work out if it needs to teardown resctrl... > > Has to, but doesn't? Have I missed something? Has to, and does: https://git.kernel.org/pub/scm/linux/kernel/git/morse/linux.git/tree/drivers/resctrl/mpam_devices.c?h=mpam/snapshot%2bextras/v6.17-rc2&id=41c768c1705d3fa0814bb1cb92c83280c872cb3e#n474 It's probably not what the user wants, but it is what they're asking for if they do this. > Are we supposed to put the MSC back into a sane state, for e.g. the > kexec path? kdump on panic() means you couldn't trust it to do this. The driver reset all the configurations during probing. It will initially run with whatever configuration was left in partid-0, there is very little we can do about this. > For resctrl, one option would be to stub out the backend -- i.e., > we don't tell resctrl that the affected resources disappeared, but > attempts to manipulate the affected MSC(s) are stubbed out (similarly > to what happens to an open tty after a hangup). > > (Something along these lines may be done somewhere, but I'm not > currently aware of it.) On teardown the cpuhp callbacks are unregistered, which makes all the CPUs and resctrl:domains appear offline, and the static key behind mpam_enabled() is disabled, which makes a bunch of paths return an error. The result is no more access to the hardware after an error has occured. The aim is to prevent accidentally muddling important/unimportant tasks due to PARTID truncation. > Is there an outstanding get on the device that prevents us from getting > here until resctrl is shut down? Thanks to the wisdom and restraint of > systemd I'd expect resctrl to tangled up in some rat's nest of > unremovable mounts by the time we try to shut down, but I hope I'm > being pessimistic. (Arguably that's not our bug, if so.) The resctrl_exit() path removes the kernfs internals from the mount point. Systemd is able to keep zombie mount points, but they end up being empty. (to reproduce this, put resctrl in fstab!) >>>> + struct mpam_msc *msc = platform_get_drvdata(pdev); >>>> + >>>> + if (!msc) >>>> + return; >>>> + >>>> + mutex_lock(&mpam_list_lock); >>>> + mpam_num_msc--; >>>> + platform_set_drvdata(pdev, NULL); >>>> + list_del_rcu(&msc->glbl_list); >>>> + synchronize_srcu(&mpam_srcu); >>>> + devm_kfree(&pdev->dev, msc); >>>> + mutex_unlock(&mpam_list_lock); >>>> +} >>>> + >>>> +static int mpam_msc_drv_probe(struct platform_device *pdev) >>>> +{ >>>> + int err; >>>> + struct mpam_msc *msc; >>>> + struct resource *msc_res; >>>> + void *plat_data = pdev->dev.platform_data; >>>> + >>>> + mutex_lock(&mpam_list_lock); >>>> + do { >>>> + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); >>>> + if (!msc) { >>>> + err = -ENOMEM; >>>> + break; >>>> + } >>>> + >>>> + mutex_init(&msc->probe_lock); >>>> + mutex_init(&msc->part_sel_lock); >>>> + mutex_init(&msc->outer_mon_sel_lock); >>>> + raw_spin_lock_init(&msc->inner_mon_sel_lock); >>>> + msc->id = mpam_num_msc++; >>>> + msc->pdev = pdev; >>>> + INIT_LIST_HEAD_RCU(&msc->glbl_list); > msc->glbl_list is not a list head? Does it need to be initialised at all? > list_add_rcu() will just splat it, by the looks of it. It's the member of the global list, hence the comment above it in the struct. I've never known if the member 'list head' needs to be initialised or not, better safe than sorry. >>>> + INIT_LIST_HEAD_RCU(&msc->ris); > > Maybe INIT_LIST_HEAD_RCU() isn't needed here. Do we ever access this > list without holding one of the MSC locks? > > This list is not used until a cpuhp hook comes in to probe the MSC, and > then mpam_discovery_cpu_online() obtains a pointer via > list_for_each_entry() -- but this is not RCU-protected. The MSC probe > lock is taken, there... mpam_reprogram_msc(), mpam_msmon_reset_all_mbwu() and mpam_reset_msc() all do this under (s)rcu. The aim is to not take a lock on the read side. Once the cpuhp callbacks are registered the only thing that writes to these lists is the error interrupt teardown. >>>> + >>>> + err = update_msc_accessibility(msc); >>>> + if (err) >>>> + break; >>>> + if (cpumask_empty(&msc->accessibility)) { >>>> + pr_err_once("msc:%u is not accessible from any CPU!", >>>> + msc->id); >>>> + err = -EINVAL; >>>> + break; >>>> + } >>>> + >>>> + if (device_property_read_u32(&pdev->dev, "pcc-channel", >>>> + &msc->pcc_subspace_id)) >>>> + msc->iface = MPAM_IFACE_MMIO; >>>> + else >>>> + msc->iface = MPAM_IFACE_PCC; >>>> + >>>> + if (msc->iface == MPAM_IFACE_MMIO) { >>>> + void __iomem *io; >>>> + >>>> + io = devm_platform_get_and_ioremap_resource(pdev, 0, >>>> + &msc_res); >>>> + if (IS_ERR(io)) { >>>> + pr_err("Failed to map MSC base address\n"); >>>> + err = PTR_ERR(io); >>>> + break; >>>> + } >>>> + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; >>>> + msc->mapped_hwpage = io; >>>> + } else if (msc->iface == MPAM_IFACE_PCC) { >>>> + msc->pcc_cl.dev = &pdev->dev; >>>> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; >>>> + msc->pcc_cl.tx_block = false; >>>> + msc->pcc_cl.tx_tout = 1000; /* 1s */ >>>> + msc->pcc_cl.knows_txdone = false; > > ... so, it feels like we may need to hold the probe lock, or ensure > that all iterations over the msc list are RCU-protected (see below for > counterexamples), or both. In here, the msc hasn't yet been added to the global list, so it can't be found by anyone. Anyone that does find it, found it by taking the list_lock and walking the global list. >>>> + >>>> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, >>>> + msc->pcc_subspace_id); >>>> + if (IS_ERR(msc->pcc_chan)) { >>>> + pr_err("Failed to request MSC PCC channel\n"); >>>> + err = PTR_ERR(msc->pcc_chan); >>>> + break; >>>> + } >>>> + } >> >>> Should the lock be held across initialisation of the msc fields? >> >> The msc isn't visible until its added to the list, so provided all that inialisation is >> done 'before' its added to the list, it doesn't matter. > Is it possible for other code to get a pointer to the new msc after > this, other than by dereferencing the list? While MSC are still being created - nothing walks the list. There are no interrupts or hotplug callabacks. Once all the MSC have been found, hardware probing starts. During hardware probing, the cpuhp callback walks the global list under srcu. Once all the hardware has been probed, the cpuhp callbacks are swapped out, and mpam_enable_once() does things with static_keys and registering interrupts. After this point, the mpam_class list/tree is used under srcu, and yes interrupts know which MSC they are for. > (The obvious case is the interrupt handlers, but it looks like the msc > pointers used for registering the interrupts are indeed obtained > through an RCU-protected iteration over mpam_all_msc.) > > Note, there seem to be non-RCU-protected iterations over mpam_all_msc > in the mpam_discovery_cpu_online() (patch 14) and That was taking the write side lock unecessarily. I've fixed it to walk the list under srcu. > mpam_enable_merge_features() paths (patch 18). Holds the write side lock. > The lack of symmetry > between list maintenance and consumption look a little suspect for > those -- is safety ensured in some other way? Readers must use the RCU primitives to be safe against a concurrent writer. Writers must take some write side lock to be safe against each other - they don't need to use the RCU list-walking primitives. >>> list_add_rcu() might imply sufficient barriers to ensure that the >>> initialisations are visible to other threads that obtain the msc >>> pointer by iterating over mpam_all_msc. >>> >>> It's probably cleaner to hold the lock explicitly, though. >> >> The list lock? We do. >> But the readers don't need to take the list lock, its only there to prevent concurrent >> writers. > > I meant whatever lock is supposed to protect the fields of the specific > msc. The list lock is not that lock. We could bundle that under the probe_lock - but there would be no need to hold that here, the struct mpam_msc isn't reachable. [..] >>>> + mpam_discovery_complete(); >>>> + >>>> + if (err && msc) >>>> + mpam_msc_drv_remove(pdev); >>>> + >>>> + return err; >>>> +} >>>> +/* >>>> + * MSC that are hidden under caches are not created as platform devices >>>> + * as there is no cache driver. Caches are also special-cased in >>>> + * update_msc_accessibility(). >>>> + */ >>> >>> Can you elaborate? I don't understand quite what this is doing. >> >> / { >> my_thing { >> compatible = "my_thing"; >> msc { >> compatible = "arm,mpam-msc"; >> }; >> }; >> >> other_thing { >> compatible = "other_thing"; >> }; >> >> msc { >> compatible = "arm,mpam-msc"; >> arm,mpam-device = <&other_thing>; >> }; >> >> >> l2-cache { >> compatible = "cache"; >> msc { >> compatible = "arm,mpam-msc"; >> }; >> }; >> }; >> >> my_thing and other_thing's MSC will have devices created - but the cache will not, because >> it's a cache not a device, and anything below it is ignored. > > OK. Maybe reword as something like: > > --8<-- > > MSCs that are declared by the firmware as being part of a cache may not > be created automatically as platform devices, since there is no > dedicated cache driver. > > Deal with those MSCs here. > > -->8-- > > Maybe add a comment at update_msc_accessibility() that references this > comment, instead of a reader of that function just needing to know that > this comment is here? I'll just drop the bit about update_msc_accessibility(), its less relevant once the next patch adds the memory node in a similar way. (and even less relevant once I rip out DT support) >>>> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h >>>> new file mode 100644 >>>> index 000000000000..07e0f240eaca >>>> --- /dev/null >>>> +++ b/drivers/resctrl/mpam_internal.h >>>> @@ -0,0 +1,62 @@ >>>> +/* SPDX-License-Identifier: GPL-2.0 */ >>>> +// Copyright (C) 2024 Arm Ltd. >>>> + >>>> +#ifndef MPAM_INTERNAL_H >>>> +#define MPAM_INTERNAL_H >>>> + >>>> +#include <linux/arm_mpam.h> >>>> +#include <linux/cpumask.h> >>>> +#include <linux/io.h> >>>> +#include <linux/mailbox_client.h> >>>> +#include <linux/mutex.h> >>>> +#include <linux/resctrl.h> >>>> +#include <linux/sizes.h> >>>> + >>>> +struct mpam_msc { >>>> + /* member of mpam_all_msc */ >>>> + struct list_head glbl_list; >> >>> It is worth making these names less mismatched? >> >> all_msc_list ? >> >> It's because its global. The pattern otherwise is parent has a list foo, and all the >> children have a member 'foo_list'. > > "all_msc", then? > > I'm not sure that it's essential for the list head's name to have > "list" in it: that's clear from the type and from how it is used. > "all" seems sufficient to imply that this is a list (?) > > It doesn't get much more global than "all". > > (This is purely cosmetic, of course.) Ending in _list is part of a pattern with the class->component --> component->component_list relationship. We can discuss whatever suffix is best on v2 - as long as they all follow the same pattern! Thanks, James
Hi James, On 8/22/25 16:29, James Morse wrote: > Probing MPAM is convoluted. MSCs that are integrated with a CPU may > only be accessible from those CPUs, and they may not be online. > Touching the hardware early is pointless as MPAM can't be used until > the system-wide common values for num_partid and num_pmg have been > discovered. > > Start with driver probe/remove and mapping the MSC. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > Changes since RFC: > * Check for status=broken DT devices. > * Moved all the files around. > * Made Kconfig symbols depend on EXPERT > --- > arch/arm64/Kconfig | 1 + > drivers/Kconfig | 2 + > drivers/Makefile | 1 + > drivers/resctrl/Kconfig | 11 ++ > drivers/resctrl/Makefile | 4 + > drivers/resctrl/mpam_devices.c | 336 ++++++++++++++++++++++++++++++++ > drivers/resctrl/mpam_internal.h | 62 ++++++ > 7 files changed, 417 insertions(+) > create mode 100644 drivers/resctrl/Kconfig > create mode 100644 drivers/resctrl/Makefile > create mode 100644 drivers/resctrl/mpam_devices.c > create mode 100644 drivers/resctrl/mpam_internal.h > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index e51ccf1da102..ea3c54e04275 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -2062,6 +2062,7 @@ config ARM64_TLB_RANGE > > config ARM64_MPAM > bool "Enable support for MPAM" > + select ARM64_MPAM_DRIVER > select ACPI_MPAM if ACPI > help > Memory Partitioning and Monitoring is an optional extension > diff --git a/drivers/Kconfig b/drivers/Kconfig > index 4915a63866b0..3054b50a2f4c 100644 > --- a/drivers/Kconfig > +++ b/drivers/Kconfig > @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" > > source "drivers/cdx/Kconfig" > > +source "drivers/resctrl/Kconfig" > + > endmenu > diff --git a/drivers/Makefile b/drivers/Makefile > index b5749cf67044..f41cf4eddeba 100644 > --- a/drivers/Makefile > +++ b/drivers/Makefile > @@ -194,5 +194,6 @@ obj-$(CONFIG_HTE) += hte/ > obj-$(CONFIG_DRM_ACCEL) += accel/ > obj-$(CONFIG_CDX_BUS) += cdx/ > obj-$(CONFIG_DPLL) += dpll/ > +obj-y += resctrl/ > > obj-$(CONFIG_S390) += s390/ > diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig > new file mode 100644 > index 000000000000..dff7b87280ab > --- /dev/null > +++ b/drivers/resctrl/Kconfig > @@ -0,0 +1,11 @@ > +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means > +# CPU resources, not containers or cgroups etc. > +config ARM64_MPAM_DRIVER > + bool "MPAM driver for System IP, e,g. caches and memory controllers" > + depends on ARM64_MPAM && EXPERT > + > +config ARM64_MPAM_DRIVER_DEBUG > + bool "Enable debug messages from the MPAM driver." > + depends on ARM64_MPAM_DRIVER > + help > + Say yes here to enable debug messages from the MPAM driver. > diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile > new file mode 100644 > index 000000000000..92b48fa20108 > --- /dev/null > +++ b/drivers/resctrl/Makefile > @@ -0,0 +1,4 @@ > +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o > +mpam-y += mpam_devices.o > + > +cflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG > diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > new file mode 100644 > index 000000000000..a0d9a699a6e7 > --- /dev/null > +++ b/drivers/resctrl/mpam_devices.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. > + > +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cacheinfo.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/gfp.h> > +#include <linux/list.h> > +#include <linux/lockdep.h> > +#include <linux/mutex.h> > +#include <linux/of.h> > +#include <linux/of_platform.h> > +#include <linux/platform_device.h> > +#include <linux/printk.h> > +#include <linux/slab.h> > +#include <linux/spinlock.h> > +#include <linux/srcu.h> > +#include <linux/types.h> > + > +#include <acpi/pcc.h> > + > +#include "mpam_internal.h" > + > +/* > + * mpam_list_lock protects the SRCU lists when writing. Once the > + * mpam_enabled key is enabled these lists are read-only, > + * unless the error interrupt disables the driver. > + */ > +static DEFINE_MUTEX(mpam_list_lock); > +static LIST_HEAD(mpam_all_msc); > + > +static struct srcu_struct mpam_srcu; > + > +/* MPAM isn't available until all the MSC have been probed. */ > +static u32 mpam_num_msc; > + > +static void mpam_discovery_complete(void) > +{ > + pr_err("Discovered all MSC\n"); > +} > + > +static int mpam_dt_count_msc(void) > +{ > + int count = 0; > + struct device_node *np; > + > + for_each_compatible_node(np, NULL, "arm,mpam-msc") { > + if (of_device_is_available(np)) > + count++; > + } > + > + return count; > +} > + > +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, > + u32 ris_idx) > +{ > + int err = 0; > + u32 level = 0; > + unsigned long cache_id; > + struct device_node *cache; > + > + do { > + if (of_device_is_compatible(np, "arm,mpam-cache")) { > + cache = of_parse_phandle(np, "arm,mpam-device", 0); > + if (!cache) { > + pr_err("Failed to read phandle\n"); > + break; > + } > + } else if (of_device_is_compatible(np->parent, "cache")) { > + cache = of_node_get(np->parent); > + } else { > + /* For now, only caches are supported */ > + cache = NULL; > + break; > + } > + > + err = of_property_read_u32(cache, "cache-level", &level); > + if (err) { > + pr_err("Failed to read cache-level\n"); > + break; > + } > + > + cache_id = cache_of_calculate_id(cache); > + if (cache_id == ~0UL) { > + err = -ENOENT; > + break; > + } > + > + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, > + cache_id); > + } while (0); > + of_node_put(cache); > + > + return err; > +} > + > +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) > +{ > + int err, num_ris = 0; > + const u32 *ris_idx_p; > + struct device_node *iter, *np; > + > + np = msc->pdev->dev.of_node; > + for_each_child_of_node(np, iter) { > + ris_idx_p = of_get_property(iter, "reg", NULL); > + if (ris_idx_p) { > + num_ris++; > + err = mpam_dt_parse_resource(msc, iter, *ris_idx_p); > + if (err) { > + of_node_put(iter); > + return err; > + } > + } > + } > + > + if (!num_ris) > + mpam_dt_parse_resource(msc, np, 0); > + > + return err; > +} > + > +/* > + * An MSC can control traffic from a set of CPUs, but may only be accessible > + * from a (hopefully wider) set of CPUs. The common reason for this is power > + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the > + * the corresponding cache may also be powered off. By making accesses from > + * one of those CPUs, we ensure this isn't the case. > + */ > +static int update_msc_accessibility(struct mpam_msc *msc) > +{ > + struct device_node *parent; > + u32 affinity_id; > + int err; > + > + if (!acpi_disabled) { > + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", > + &affinity_id); > + if (err) > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + else > + acpi_pptt_get_cpus_from_container(affinity_id, > + &msc->accessibility); > + > + return 0; > + } > + > + /* This depends on the path to of_node */ > + parent = of_get_parent(msc->pdev->dev.of_node); > + if (parent == of_root) { > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + err = 0; > + } else { > + err = -EINVAL; > + pr_err("Cannot determine accessibility of MSC: %s\n", > + dev_name(&msc->pdev->dev)); > + } > + of_node_put(parent); > + > + return err; > +} > + > +static int fw_num_msc; > + > +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) > +{ > + /* TODO: wake up tasks blocked on this MSC's PCC channel */ > +} > + > +static void mpam_msc_drv_remove(struct platform_device *pdev) > +{ > + struct mpam_msc *msc = platform_get_drvdata(pdev); > + > + if (!msc) > + return; > + > + mutex_lock(&mpam_list_lock); > + mpam_num_msc--; > + platform_set_drvdata(pdev, NULL); > + list_del_rcu(&msc->glbl_list); > + synchronize_srcu(&mpam_srcu); > + devm_kfree(&pdev->dev, msc); > + mutex_unlock(&mpam_list_lock); > +} > + > +static int mpam_msc_drv_probe(struct platform_device *pdev) > +{ > + int err; > + struct mpam_msc *msc; > + struct resource *msc_res; > + void *plat_data = pdev->dev.platform_data; > + > + mutex_lock(&mpam_list_lock); > + do { > + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); > + if (!msc) { > + err = -ENOMEM; > + break; > + } > + > + mutex_init(&msc->probe_lock); > + mutex_init(&msc->part_sel_lock); > + mutex_init(&msc->outer_mon_sel_lock); > + raw_spin_lock_init(&msc->inner_mon_sel_lock); > + msc->id = mpam_num_msc++; > + msc->pdev = pdev; > + INIT_LIST_HEAD_RCU(&msc->glbl_list); > + INIT_LIST_HEAD_RCU(&msc->ris); > + > + err = update_msc_accessibility(msc); > + if (err) > + break; > + if (cpumask_empty(&msc->accessibility)) { > + pr_err_once("msc:%u is not accessible from any CPU!", > + msc->id); > + err = -EINVAL; > + break; > + } > + > + if (device_property_read_u32(&pdev->dev, "pcc-channel", > + &msc->pcc_subspace_id)) > + msc->iface = MPAM_IFACE_MMIO; > + else > + msc->iface = MPAM_IFACE_PCC; > + > + if (msc->iface == MPAM_IFACE_MMIO) { > + void __iomem *io; > + > + io = devm_platform_get_and_ioremap_resource(pdev, 0, > + &msc_res); > + if (IS_ERR(io)) { > + pr_err("Failed to map MSC base address\n"); > + err = PTR_ERR(io); > + break; > + } > + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; > + msc->mapped_hwpage = io; > + } else if (msc->iface == MPAM_IFACE_PCC) { > + msc->pcc_cl.dev = &pdev->dev; > + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; > + msc->pcc_cl.tx_block = false; > + msc->pcc_cl.tx_tout = 1000; /* 1s */ > + msc->pcc_cl.knows_txdone = false; > + > + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, > + msc->pcc_subspace_id); > + if (IS_ERR(msc->pcc_chan)) { > + pr_err("Failed to request MSC PCC channel\n"); > + err = PTR_ERR(msc->pcc_chan); > + break; > + } > + } > + > + list_add_rcu(&msc->glbl_list, &mpam_all_msc); > + platform_set_drvdata(pdev, msc); > + } while (0); > + mutex_unlock(&mpam_list_lock); > + > + if (!err) { > + /* Create RIS entries described by firmware */ > + if (!acpi_disabled) > + err = acpi_mpam_parse_resources(msc, plat_data); > + else > + err = mpam_dt_parse_resources(msc, plat_data); > + } > + > + if (!err && fw_num_msc == mpam_num_msc) > + mpam_discovery_complete(); > + > + if (err && msc) > + mpam_msc_drv_remove(pdev); > + > + return err; > +} > + > +static const struct of_device_id mpam_of_match[] = { > + { .compatible = "arm,mpam-msc", }, > + {}, > +}; > +MODULE_DEVICE_TABLE(of, mpam_of_match); > + > +static struct platform_driver mpam_msc_driver = { > + .driver = { > + .name = "mpam_msc", > + .of_match_table = of_match_ptr(mpam_of_match), > + }, > + .probe = mpam_msc_drv_probe, > + .remove = mpam_msc_drv_remove, > +}; > + > +/* > + * MSC that are hidden under caches are not created as platform devices > + * as there is no cache driver. Caches are also special-cased in > + * update_msc_accessibility(). > + */ > +static void mpam_dt_create_foundling_msc(void) > +{ > + int err; > + struct device_node *cache; > + > + for_each_compatible_node(cache, NULL, "cache") { > + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); > + if (err) > + pr_err("Failed to create MSC devices under caches\n"); > + } > +} > + > +static int __init mpam_msc_driver_init(void) > +{ > + if (!system_supports_mpam()) > + return -EOPNOTSUPP; > + > + init_srcu_struct(&mpam_srcu); > + > + if (!acpi_disabled) > + fw_num_msc = acpi_mpam_count_msc(); > + else > + fw_num_msc = mpam_dt_count_msc(); > + > + if (fw_num_msc <= 0) { > + pr_err("No MSC devices found in firmware\n"); > + return -EINVAL; > + } > + > + if (acpi_disabled) > + mpam_dt_create_foundling_msc(); > + > + return platform_driver_register(&mpam_msc_driver); > +} > +subsys_initcall(mpam_msc_driver_init); > diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > new file mode 100644 > index 000000000000..07e0f240eaca > --- /dev/null > +++ b/drivers/resctrl/mpam_internal.h > @@ -0,0 +1,62 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +// Copyright (C) 2024 Arm Ltd. > + > +#ifndef MPAM_INTERNAL_H > +#define MPAM_INTERNAL_H > + > +#include <linux/arm_mpam.h> > +#include <linux/cpumask.h> > +#include <linux/io.h> > +#include <linux/mailbox_client.h> > +#include <linux/mutex.h> > +#include <linux/resctrl.h> > +#include <linux/sizes.h> > + > +struct mpam_msc { > + /* member of mpam_all_msc */ > + struct list_head glbl_list; > + > + int id; > + struct platform_device *pdev; > + > + /* Not modified after mpam_is_enabled() becomes true */ > + enum mpam_msc_iface iface; > + u32 pcc_subspace_id; > + struct mbox_client pcc_cl; > + struct pcc_mbox_chan *pcc_chan; > + u32 nrdy_usec; > + cpumask_t accessibility; > + > + /* > + * probe_lock is only take during discovery. After discovery these > + * properties become read-only and the lists are protected by SRCU. > + */ > + struct mutex probe_lock; > + unsigned long ris_idxs[128 / BITS_PER_LONG]; Why is this sized this way? RIS_MAX is 4 bits and so there are at most 16 RIS per msc. > + u32 ris_max; > + > + /* mpam_msc_ris of this component */ > + struct list_head ris; > + > + /* > + * part_sel_lock protects access to the MSC hardware registers that are > + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary > + * by RIS). > + * If needed, take msc->lock first. > + */ > + struct mutex part_sel_lock; > + > + /* > + * mon_sel_lock protects access to the MSC hardware registers that are > + * affeted by MPAMCFG_MON_SEL. > + * If needed, take msc->lock first. > + */ > + struct mutex outer_mon_sel_lock; > + raw_spinlock_t inner_mon_sel_lock; > + unsigned long inner_mon_sel_flags; > + > + void __iomem *mapped_hwpage; > + size_t mapped_hwpage_sz; > +}; > + > +#endif /* MPAM_INTERNAL_H */ Thanks, Ben
Hi Ben, On 01/09/2025 10:11, Ben Horgan wrote: > On 8/22/25 16:29, James Morse wrote: >> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >> only be accessible from those CPUs, and they may not be online. >> Touching the hardware early is pointless as MPAM can't be used until >> the system-wide common values for num_partid and num_pmg have been >> discovered. >> >> Start with driver probe/remove and mapping the MSC. >> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h >> new file mode 100644 >> index 000000000000..07e0f240eaca >> --- /dev/null >> +++ b/drivers/resctrl/mpam_internal.h >> @@ -0,0 +1,62 @@ >> +struct mpam_msc { >> + /* member of mpam_all_msc */ >> + struct list_head glbl_list; >> + >> + int id; >> + struct platform_device *pdev; >> + >> + /* Not modified after mpam_is_enabled() becomes true */ >> + enum mpam_msc_iface iface; >> + u32 pcc_subspace_id; >> + struct mbox_client pcc_cl; >> + struct pcc_mbox_chan *pcc_chan; >> + u32 nrdy_usec; >> + cpumask_t accessibility; >> + >> + /* >> + * probe_lock is only take during discovery. After discovery these >> + * properties become read-only and the lists are protected by SRCU. >> + */ >> + struct mutex probe_lock; >> + unsigned long ris_idxs[128 / BITS_PER_LONG]; > Why is this sized this way? RIS_MAX is 4 bits and so there are at most > 16 RIS per msc. Hmmm, lost in time - I agree with the 16 reasoning. Fixed. (It's likely due to RES0 space above the field - but that has been filled in with other stuff since then. RIS was added as a 'backward compatible feature' - I was wary of them extending it) >> + u32 ris_max; >> + >> + /* mpam_msc_ris of this component */ >> + struct list_head ris; >> + >> + /* >> + * part_sel_lock protects access to the MSC hardware registers that are >> + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary >> + * by RIS). >> + * If needed, take msc->lock first. >> + */ >> + struct mutex part_sel_lock; >> + >> + /* >> + * mon_sel_lock protects access to the MSC hardware registers that are >> + * affeted by MPAMCFG_MON_SEL. >> + * If needed, take msc->lock first. >> + */ >> + struct mutex outer_mon_sel_lock; >> + raw_spinlock_t inner_mon_sel_lock; >> + unsigned long inner_mon_sel_flags; >> + >> + void __iomem *mapped_hwpage; >> + size_t mapped_hwpage_sz; >> +}; >> + >> +#endif /* MPAM_INTERNAL_H */ Thanks, James
On Fri, Aug 22, 2025 at 10:32 AM James Morse <james.morse@arm.com> wrote: > > Probing MPAM is convoluted. MSCs that are integrated with a CPU may > only be accessible from those CPUs, and they may not be online. > Touching the hardware early is pointless as MPAM can't be used until > the system-wide common values for num_partid and num_pmg have been > discovered. > > Start with driver probe/remove and mapping the MSC. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > Changes since RFC: > * Check for status=broken DT devices. No such status... 'disabled' can be for a variety of reasons. > * Moved all the files around. > * Made Kconfig symbols depend on EXPERT > --- > arch/arm64/Kconfig | 1 + > drivers/Kconfig | 2 + > drivers/Makefile | 1 + > drivers/resctrl/Kconfig | 11 ++ > drivers/resctrl/Makefile | 4 + > drivers/resctrl/mpam_devices.c | 336 ++++++++++++++++++++++++++++++++ > drivers/resctrl/mpam_internal.h | 62 ++++++ > 7 files changed, 417 insertions(+) > create mode 100644 drivers/resctrl/Kconfig > create mode 100644 drivers/resctrl/Makefile > create mode 100644 drivers/resctrl/mpam_devices.c > create mode 100644 drivers/resctrl/mpam_internal.h > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index e51ccf1da102..ea3c54e04275 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -2062,6 +2062,7 @@ config ARM64_TLB_RANGE > > config ARM64_MPAM > bool "Enable support for MPAM" > + select ARM64_MPAM_DRIVER > select ACPI_MPAM if ACPI > help > Memory Partitioning and Monitoring is an optional extension > diff --git a/drivers/Kconfig b/drivers/Kconfig > index 4915a63866b0..3054b50a2f4c 100644 > --- a/drivers/Kconfig > +++ b/drivers/Kconfig > @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" > > source "drivers/cdx/Kconfig" > > +source "drivers/resctrl/Kconfig" > + > endmenu > diff --git a/drivers/Makefile b/drivers/Makefile > index b5749cf67044..f41cf4eddeba 100644 > --- a/drivers/Makefile > +++ b/drivers/Makefile > @@ -194,5 +194,6 @@ obj-$(CONFIG_HTE) += hte/ > obj-$(CONFIG_DRM_ACCEL) += accel/ > obj-$(CONFIG_CDX_BUS) += cdx/ > obj-$(CONFIG_DPLL) += dpll/ > +obj-y += resctrl/ > > obj-$(CONFIG_S390) += s390/ > diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig > new file mode 100644 > index 000000000000..dff7b87280ab > --- /dev/null > +++ b/drivers/resctrl/Kconfig > @@ -0,0 +1,11 @@ > +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means > +# CPU resources, not containers or cgroups etc. > +config ARM64_MPAM_DRIVER > + bool "MPAM driver for System IP, e,g. caches and memory controllers" > + depends on ARM64_MPAM && EXPERT > + > +config ARM64_MPAM_DRIVER_DEBUG > + bool "Enable debug messages from the MPAM driver." > + depends on ARM64_MPAM_DRIVER > + help > + Say yes here to enable debug messages from the MPAM driver. > diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile > new file mode 100644 > index 000000000000..92b48fa20108 > --- /dev/null > +++ b/drivers/resctrl/Makefile > @@ -0,0 +1,4 @@ > +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o > +mpam-y += mpam_devices.o > + > +cflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG > diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > new file mode 100644 > index 000000000000..a0d9a699a6e7 > --- /dev/null > +++ b/drivers/resctrl/mpam_devices.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. Given the 2024 below, should this be 2024-2025? > + > +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cacheinfo.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/gfp.h> > +#include <linux/list.h> > +#include <linux/lockdep.h> > +#include <linux/mutex.h> > +#include <linux/of.h> > +#include <linux/of_platform.h> > +#include <linux/platform_device.h> > +#include <linux/printk.h> > +#include <linux/slab.h> > +#include <linux/spinlock.h> > +#include <linux/srcu.h> > +#include <linux/types.h> > + > +#include <acpi/pcc.h> > + > +#include "mpam_internal.h" > + > +/* > + * mpam_list_lock protects the SRCU lists when writing. Once the > + * mpam_enabled key is enabled these lists are read-only, > + * unless the error interrupt disables the driver. > + */ > +static DEFINE_MUTEX(mpam_list_lock); > +static LIST_HEAD(mpam_all_msc); > + > +static struct srcu_struct mpam_srcu; > + > +/* MPAM isn't available until all the MSC have been probed. */ > +static u32 mpam_num_msc; > + > +static void mpam_discovery_complete(void) > +{ > + pr_err("Discovered all MSC\n"); Perhaps print out how many MSCs. > +} > + > +static int mpam_dt_count_msc(void) > +{ > + int count = 0; > + struct device_node *np; > + > + for_each_compatible_node(np, NULL, "arm,mpam-msc") { > + if (of_device_is_available(np)) > + count++; > + } > + > + return count; > +} > + > +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, > + u32 ris_idx) > +{ > + int err = 0; > + u32 level = 0; > + unsigned long cache_id; > + struct device_node *cache; > + > + do { > + if (of_device_is_compatible(np, "arm,mpam-cache")) { > + cache = of_parse_phandle(np, "arm,mpam-device", 0); > + if (!cache) { > + pr_err("Failed to read phandle\n"); > + break; > + } > + } else if (of_device_is_compatible(np->parent, "cache")) { Don't access device_node members. I'm trying to make it opaque. And technically it can be racy to access parent ptr when/if nodes are dynamic. I think this should suffice: else { cache = of_get_parent(np); if (!of_device_is_compatible(cache, "cache")) { cache = NULL; break; } } > + cache = of_node_get(np->parent); > + } else { > + /* For now, only caches are supported */ > + cache = NULL; > + break; > + } > + > + err = of_property_read_u32(cache, "cache-level", &level); > + if (err) { > + pr_err("Failed to read cache-level\n"); > + break; > + } > + > + cache_id = cache_of_calculate_id(cache); > + if (cache_id == ~0UL) { > + err = -ENOENT; > + break; > + } > + > + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, > + cache_id); > + } while (0); > + of_node_put(cache); > + > + return err; > +} > + > +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) > +{ > + int err, num_ris = 0; > + const u32 *ris_idx_p; > + struct device_node *iter, *np; > + > + np = msc->pdev->dev.of_node; > + for_each_child_of_node(np, iter) { Use for_each_available_child_of_node_scoped() > + ris_idx_p = of_get_property(iter, "reg", NULL); This is broken on big endian and new users of of_get_property() are discouraged. Use of_property_read_reg(). > + if (ris_idx_p) { > + num_ris++; > + err = mpam_dt_parse_resource(msc, iter, *ris_idx_p); > + if (err) { > + of_node_put(iter); And then drop the put. > + return err; > + } > + } > + } > + > + if (!num_ris) > + mpam_dt_parse_resource(msc, np, 0); > + > + return err; > +} > + > +/* > + * An MSC can control traffic from a set of CPUs, but may only be accessible > + * from a (hopefully wider) set of CPUs. The common reason for this is power > + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the > + * the corresponding cache may also be powered off. By making accesses from > + * one of those CPUs, we ensure this isn't the case. > + */ > +static int update_msc_accessibility(struct mpam_msc *msc) > +{ > + struct device_node *parent; > + u32 affinity_id; > + int err; > + > + if (!acpi_disabled) { > + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", > + &affinity_id); > + if (err) > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + else > + acpi_pptt_get_cpus_from_container(affinity_id, > + &msc->accessibility); > + > + return 0; > + } > + > + /* This depends on the path to of_node */ I'm failing to understand what has to be at the root node? > + parent = of_get_parent(msc->pdev->dev.of_node); > + if (parent == of_root) { > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + err = 0; > + } else { > + err = -EINVAL; > + pr_err("Cannot determine accessibility of MSC: %s\n", > + dev_name(&msc->pdev->dev)); > + } > + of_node_put(parent); > + > + return err; > +} > + > +static int fw_num_msc; > + > +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) > +{ > + /* TODO: wake up tasks blocked on this MSC's PCC channel */ > +} > + > +static void mpam_msc_drv_remove(struct platform_device *pdev) > +{ > + struct mpam_msc *msc = platform_get_drvdata(pdev); > + > + if (!msc) > + return; > + > + mutex_lock(&mpam_list_lock); > + mpam_num_msc--; > + platform_set_drvdata(pdev, NULL); > + list_del_rcu(&msc->glbl_list); > + synchronize_srcu(&mpam_srcu); > + devm_kfree(&pdev->dev, msc); This should happen automagically. > + mutex_unlock(&mpam_list_lock); > +} > + > +static int mpam_msc_drv_probe(struct platform_device *pdev) > +{ > + int err; > + struct mpam_msc *msc; > + struct resource *msc_res; > + void *plat_data = pdev->dev.platform_data; > + > + mutex_lock(&mpam_list_lock); > + do { > + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); > + if (!msc) { > + err = -ENOMEM; > + break; > + } > + > + mutex_init(&msc->probe_lock); > + mutex_init(&msc->part_sel_lock); > + mutex_init(&msc->outer_mon_sel_lock); > + raw_spin_lock_init(&msc->inner_mon_sel_lock); > + msc->id = mpam_num_msc++; Multiple probe functions can run in parallel, so this needs to be atomic. Maybe it is with mpam_list_lock, but then the name of the mutex is misleading given this is not the list. It's not really clear to me what all needs the mutex here. Certainly a lot of it doesn't. Like everything else above here except the increment. > + msc->pdev = pdev; > + INIT_LIST_HEAD_RCU(&msc->glbl_list); > + INIT_LIST_HEAD_RCU(&msc->ris); > + > + err = update_msc_accessibility(msc); > + if (err) > + break; > + if (cpumask_empty(&msc->accessibility)) { > + pr_err_once("msc:%u is not accessible from any CPU!", > + msc->id); > + err = -EINVAL; > + break; > + } > + > + if (device_property_read_u32(&pdev->dev, "pcc-channel", Does this property apply to DT? It would as the code is written. It is not documented though. > + &msc->pcc_subspace_id)) > + msc->iface = MPAM_IFACE_MMIO; > + else > + msc->iface = MPAM_IFACE_PCC; > + > + if (msc->iface == MPAM_IFACE_MMIO) { > + void __iomem *io; > + > + io = devm_platform_get_and_ioremap_resource(pdev, 0, > + &msc_res); > + if (IS_ERR(io)) { > + pr_err("Failed to map MSC base address\n"); > + err = PTR_ERR(io); > + break; > + } > + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; > + msc->mapped_hwpage = io; > + } else if (msc->iface == MPAM_IFACE_PCC) { > + msc->pcc_cl.dev = &pdev->dev; > + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; > + msc->pcc_cl.tx_block = false; > + msc->pcc_cl.tx_tout = 1000; /* 1s */ > + msc->pcc_cl.knows_txdone = false; > + > + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, > + msc->pcc_subspace_id); > + if (IS_ERR(msc->pcc_chan)) { > + pr_err("Failed to request MSC PCC channel\n"); > + err = PTR_ERR(msc->pcc_chan); > + break; > + } > + } > + > + list_add_rcu(&msc->glbl_list, &mpam_all_msc); > + platform_set_drvdata(pdev, msc); > + } while (0); > + mutex_unlock(&mpam_list_lock); > + > + if (!err) { > + /* Create RIS entries described by firmware */ > + if (!acpi_disabled) > + err = acpi_mpam_parse_resources(msc, plat_data); > + else > + err = mpam_dt_parse_resources(msc, plat_data); Isn't there a race here if an error occurs since you already added the MSC to the list? Something like this sequence with 2 MSCs: device 1 probe device 1 added device 2 probe device 2 added device 1 calls mpam_discovery_complete() device 2 error on parse_resources device 2 removed > + } > + > + if (!err && fw_num_msc == mpam_num_msc) > + mpam_discovery_complete(); > + > + if (err && msc) > + mpam_msc_drv_remove(pdev); > + > + return err; > +} > + > +static const struct of_device_id mpam_of_match[] = { > + { .compatible = "arm,mpam-msc", }, > + {}, > +}; > +MODULE_DEVICE_TABLE(of, mpam_of_match); > + > +static struct platform_driver mpam_msc_driver = { > + .driver = { > + .name = "mpam_msc", > + .of_match_table = of_match_ptr(mpam_of_match), > + }, > + .probe = mpam_msc_drv_probe, > + .remove = mpam_msc_drv_remove, > +}; > + > +/* > + * MSC that are hidden under caches are not created as platform devices > + * as there is no cache driver. Caches are also special-cased in > + * update_msc_accessibility(). > + */ > +static void mpam_dt_create_foundling_msc(void) > +{ > + int err; > + struct device_node *cache; > + > + for_each_compatible_node(cache, NULL, "cache") { > + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); This is going to create platform devices for all caches (except L1) regardless of whether they support MPAM or not. Isn't it likely or possible that only L3 or SLC caches support MPAM? > + if (err) > + pr_err("Failed to create MSC devices under caches\n"); > + } > +} > + > +static int __init mpam_msc_driver_init(void) > +{ > + if (!system_supports_mpam()) > + return -EOPNOTSUPP; > + > + init_srcu_struct(&mpam_srcu); > + > + if (!acpi_disabled) > + fw_num_msc = acpi_mpam_count_msc(); > + else > + fw_num_msc = mpam_dt_count_msc(); > + > + if (fw_num_msc <= 0) { > + pr_err("No MSC devices found in firmware\n"); > + return -EINVAL; > + } > + > + if (acpi_disabled) > + mpam_dt_create_foundling_msc(); > + > + return platform_driver_register(&mpam_msc_driver); > +} > +subsys_initcall(mpam_msc_driver_init); > diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > new file mode 100644 > index 000000000000..07e0f240eaca > --- /dev/null > +++ b/drivers/resctrl/mpam_internal.h > @@ -0,0 +1,62 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +// Copyright (C) 2024 Arm Ltd. It's 2025. > + > +#ifndef MPAM_INTERNAL_H > +#define MPAM_INTERNAL_H > + > +#include <linux/arm_mpam.h> > +#include <linux/cpumask.h> > +#include <linux/io.h> > +#include <linux/mailbox_client.h> > +#include <linux/mutex.h> > +#include <linux/resctrl.h> > +#include <linux/sizes.h> > + > +struct mpam_msc { > + /* member of mpam_all_msc */ > + struct list_head glbl_list; > + > + int id; > + struct platform_device *pdev; > + > + /* Not modified after mpam_is_enabled() becomes true */ > + enum mpam_msc_iface iface; > + u32 pcc_subspace_id; > + struct mbox_client pcc_cl; > + struct pcc_mbox_chan *pcc_chan; > + u32 nrdy_usec; > + cpumask_t accessibility; > + > + /* > + * probe_lock is only take during discovery. After discovery these s/take/taken/ > + * properties become read-only and the lists are protected by SRCU. > + */ > + struct mutex probe_lock; > + unsigned long ris_idxs[128 / BITS_PER_LONG]; > + u32 ris_max; > + > + /* mpam_msc_ris of this component */ > + struct list_head ris; > + > + /* > + * part_sel_lock protects access to the MSC hardware registers that are > + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary > + * by RIS). > + * If needed, take msc->lock first. Stale comment? I don't see any 'lock' member. > + */ > + struct mutex part_sel_lock; > + > + /* > + * mon_sel_lock protects access to the MSC hardware registers that are > + * affeted by MPAMCFG_MON_SEL. > + * If needed, take msc->lock first. > + */ > + struct mutex outer_mon_sel_lock; > + raw_spinlock_t inner_mon_sel_lock; > + unsigned long inner_mon_sel_flags; > + > + void __iomem *mapped_hwpage; > + size_t mapped_hwpage_sz; > +}; > + > +#endif /* MPAM_INTERNAL_H */ > -- > 2.20.1 >
Hi Rob, On 27/08/2025 16:39, Rob Herring wrote: > On Fri, Aug 22, 2025 at 10:32 AM James Morse <james.morse@arm.com> wrote: >> >> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >> only be accessible from those CPUs, and they may not be online. >> Touching the hardware early is pointless as MPAM can't be used until >> the system-wide common values for num_partid and num_pmg have been >> discovered. >> >> Start with driver probe/remove and mapping the MSC. >> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c >> new file mode 100644 >> index 000000000000..a0d9a699a6e7 >> --- /dev/null >> +++ b/drivers/resctrl/mpam_devices.c >> @@ -0,0 +1,336 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +// Copyright (C) 2025 Arm Ltd. > > Given the 2024 below, should this be 2024-2025? I've never known what this year is really for. People clearly expect it to be this year ... I've evidently missed one somewhere. I'll fix that. ... I wrote some of this code in 2018, so there are a range of options on what it 'should' be ... >> + >> +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ >> + >> +#include <linux/acpi.h> >> +#include <linux/arm_mpam.h> >> +#include <linux/cacheinfo.h> >> +#include <linux/cpu.h> >> +#include <linux/cpumask.h> >> +#include <linux/device.h> >> +#include <linux/errno.h> >> +#include <linux/gfp.h> >> +#include <linux/list.h> >> +#include <linux/lockdep.h> >> +#include <linux/mutex.h> >> +#include <linux/of.h> >> +#include <linux/of_platform.h> >> +#include <linux/platform_device.h> >> +#include <linux/printk.h> >> +#include <linux/slab.h> >> +#include <linux/spinlock.h> >> +#include <linux/srcu.h> >> +#include <linux/types.h> >> + >> +#include <acpi/pcc.h> >> + >> +#include "mpam_internal.h" >> + >> +/* >> + * mpam_list_lock protects the SRCU lists when writing. Once the >> + * mpam_enabled key is enabled these lists are read-only, >> + * unless the error interrupt disables the driver. >> + */ >> +static DEFINE_MUTEX(mpam_list_lock); >> +static LIST_HEAD(mpam_all_msc); >> + >> +static struct srcu_struct mpam_srcu; >> + >> +/* MPAM isn't available until all the MSC have been probed. */ >> +static u32 mpam_num_msc; >> + >> +static void mpam_discovery_complete(void) >> +{ >> + pr_err("Discovered all MSC\n"); > Perhaps print out how many MSCs. Once the whole thing is assembled the mpam_enable() call prints the number of PARTID/PMG, which is something user-space can do something with. I don't think the number of MSC is useful to anyone as some of them are grouped together and can't be configured independently. >> +} >> +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, >> + u32 ris_idx) >> +{ >> + int err = 0; >> + u32 level = 0; >> + unsigned long cache_id; >> + struct device_node *cache; >> + >> + do { >> + if (of_device_is_compatible(np, "arm,mpam-cache")) { >> + cache = of_parse_phandle(np, "arm,mpam-device", 0); >> + if (!cache) { >> + pr_err("Failed to read phandle\n"); >> + break; >> + } >> + } else if (of_device_is_compatible(np->parent, "cache")) { > > Don't access device_node members. I'm trying to make it opaque. And > technically it can be racy to access parent ptr when/if nodes are > dynamic. I think this should suffice: > > else { > cache = of_get_parent(np); > if (!of_device_is_compatible(cache, "cache")) { > cache = NULL; > break; > } > } Thanks! The if/else-if ladder needs to stay, I've grabbed the parent earlier, and added a of_node_put() of it after the do/while. >> + cache = of_node_get(np->parent); >> + } else { >> + /* For now, only caches are supported */ >> + cache = NULL; >> + break; >> + } >> + >> + err = of_property_read_u32(cache, "cache-level", &level); >> + if (err) { >> + pr_err("Failed to read cache-level\n"); >> + break; >> + } >> + >> + cache_id = cache_of_calculate_id(cache); >> + if (cache_id == ~0UL) { >> + err = -ENOENT; >> + break; >> + } >> + >> + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, >> + cache_id); >> + } while (0); >> + of_node_put(cache); >> + >> + return err; >> +} >> + >> +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) >> +{ >> + int err, num_ris = 0; >> + const u32 *ris_idx_p; >> + struct device_node *iter, *np; >> + >> + np = msc->pdev->dev.of_node; >> + for_each_child_of_node(np, iter) { > > Use for_each_available_child_of_node_scoped() Sure, >> + ris_idx_p = of_get_property(iter, "reg", NULL); > > This is broken on big endian and new users of of_get_property() are > discouraged. Use of_property_read_reg(). Done, >> + if (ris_idx_p) { >> + num_ris++; >> + err = mpam_dt_parse_resource(msc, iter, *ris_idx_p); >> + if (err) { >> + of_node_put(iter); > > And then drop the put. > >> + return err; >> + } >> + } >> + } >> + >> + if (!num_ris) >> + mpam_dt_parse_resource(msc, np, 0); >> + >> + return err; >> +} >> +/* >> + * An MSC can control traffic from a set of CPUs, but may only be accessible >> + * from a (hopefully wider) set of CPUs. The common reason for this is power >> + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the >> + * the corresponding cache may also be powered off. By making accesses from >> + * one of those CPUs, we ensure this isn't the case. >> + */ >> +static int update_msc_accessibility(struct mpam_msc *msc) >> +{ >> + struct device_node *parent; >> + u32 affinity_id; >> + int err; >> + >> + if (!acpi_disabled) { >> + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", >> + &affinity_id); >> + if (err) >> + cpumask_copy(&msc->accessibility, cpu_possible_mask); >> + else >> + acpi_pptt_get_cpus_from_container(affinity_id, >> + &msc->accessibility); >> + >> + return 0; >> + } >> + >> + /* This depends on the path to of_node */ > I'm failing to understand what has to be at the root node? The accessibility bitmap. If the MSC is at the root of the tree - its assumed to be global. If its buried in a device, its assumed to be in the same power domain as that device. Practically this only matters for caches where PSCI's CPU_SUSPEND can turn the cache off, so the cache MSC can only be accessed from the CPU's local to it, that way we know its not about to be turned off by PSCI. I'll rephrase the comment - its trying to explain why its not explicitly encoded. | /* Where an MSC can be accessed from depends on the path to of_node. */ >> + parent = of_get_parent(msc->pdev->dev.of_node); >> + if (parent == of_root) { >> + cpumask_copy(&msc->accessibility, cpu_possible_mask); >> + err = 0; >> + } else { >> + err = -EINVAL; >> + pr_err("Cannot determine accessibility of MSC: %s\n", >> + dev_name(&msc->pdev->dev)); >> + } >> + of_node_put(parent); >> + >> + return err; >> +} >> +static void mpam_msc_drv_remove(struct platform_device *pdev) >> +{ >> + struct mpam_msc *msc = platform_get_drvdata(pdev); >> + >> + if (!msc) >> + return; >> + >> + mutex_lock(&mpam_list_lock); >> + mpam_num_msc--; >> + platform_set_drvdata(pdev, NULL); >> + list_del_rcu(&msc->glbl_list); >> + synchronize_srcu(&mpam_srcu); >> + devm_kfree(&pdev->dev, msc); > > This should happen automagically. > >> + mutex_unlock(&mpam_list_lock); >> +} >> + >> +static int mpam_msc_drv_probe(struct platform_device *pdev) >> +{ >> + int err; >> + struct mpam_msc *msc; >> + struct resource *msc_res; >> + void *plat_data = pdev->dev.platform_data; >> + >> + mutex_lock(&mpam_list_lock); >> + do { >> + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); >> + if (!msc) { >> + err = -ENOMEM; >> + break; >> + } >> + >> + mutex_init(&msc->probe_lock); >> + mutex_init(&msc->part_sel_lock); >> + mutex_init(&msc->outer_mon_sel_lock); >> + raw_spin_lock_init(&msc->inner_mon_sel_lock); >> + msc->id = mpam_num_msc++; > > Multiple probe functions can run in parallel, so this needs to be > atomic. Maybe it is with mpam_list_lock, but then the name of the > mutex is misleading given this is not the list. It's not really clear > to me what all needs the mutex here. Certainly a lot of it doesn't. > Like everything else above here except the increment. It's more about the class/component/device lists that get added later - but more on this mpam_num_msc thing below... >> + msc->pdev = pdev; >> + INIT_LIST_HEAD_RCU(&msc->glbl_list); >> + INIT_LIST_HEAD_RCU(&msc->ris); >> + >> + err = update_msc_accessibility(msc); >> + if (err) >> + break; >> + if (cpumask_empty(&msc->accessibility)) { >> + pr_err_once("msc:%u is not accessible from any CPU!", >> + msc->id); >> + err = -EINVAL; >> + break; >> + } >> + >> + if (device_property_read_u32(&pdev->dev, "pcc-channel", > Does this property apply to DT? It would as the code is written. It is > not documented though. I don't think so - on DT PCC is going to be spelled SCMI which somes with some kind of discovery instead. This property is added by the ACPI table 'driver'. >> + &msc->pcc_subspace_id)) >> + msc->iface = MPAM_IFACE_MMIO; >> + else >> + msc->iface = MPAM_IFACE_PCC; >> + >> + if (msc->iface == MPAM_IFACE_MMIO) { >> + void __iomem *io; >> + >> + io = devm_platform_get_and_ioremap_resource(pdev, 0, >> + &msc_res); >> + if (IS_ERR(io)) { >> + pr_err("Failed to map MSC base address\n"); >> + err = PTR_ERR(io); >> + break; >> + } >> + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; >> + msc->mapped_hwpage = io; >> + } else if (msc->iface == MPAM_IFACE_PCC) { >> + msc->pcc_cl.dev = &pdev->dev; >> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; >> + msc->pcc_cl.tx_block = false; >> + msc->pcc_cl.tx_tout = 1000; /* 1s */ >> + msc->pcc_cl.knows_txdone = false; >> + >> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, >> + msc->pcc_subspace_id); >> + if (IS_ERR(msc->pcc_chan)) { >> + pr_err("Failed to request MSC PCC channel\n"); >> + err = PTR_ERR(msc->pcc_chan); >> + break; >> + } >> + } >> + >> + list_add_rcu(&msc->glbl_list, &mpam_all_msc); >> + platform_set_drvdata(pdev, msc); >> + } while (0); >> + mutex_unlock(&mpam_list_lock); >> + >> + if (!err) { >> + /* Create RIS entries described by firmware */ >> + if (!acpi_disabled) >> + err = acpi_mpam_parse_resources(msc, plat_data); >> + else >> + err = mpam_dt_parse_resources(msc, plat_data); > > Isn't there a race here if an error occurs since you already added the > MSC to the list? Something like this sequence with 2 MSCs: > > device 1 probe > device 1 added > device 2 probe > device 2 added > device 1 calls mpam_discovery_complete() > device 2 error on parse_resources > device 2 removed By the time the whole thing is assembled the 'discovery complete' work is scheduled by a cpuhp callback and has better protection against this. That discovery-complete call is just to help illustrate that there is stuff that happens once all the MSC have been discovered, as that code gets much more complicated later in teh series. Combined with your comment about the msc_id increment above - I'll stop using that as both a count and an id, re-using the pdev->id as its more likely to be stable from boot to boot. mpam_num_msc can then become an atomic_t that is incremented once we know we're not going to remove the MSC from the list. >> + } >> + >> + if (!err && fw_num_msc == mpam_num_msc) >> + mpam_discovery_complete(); >> + >> + if (err && msc) >> + mpam_msc_drv_remove(pdev); >> + >> + return err; >> +} >> +/* >> + * MSC that are hidden under caches are not created as platform devices >> + * as there is no cache driver. Caches are also special-cased in >> + * update_msc_accessibility(). >> + */ >> +static void mpam_dt_create_foundling_msc(void) >> +{ >> + int err; >> + struct device_node *cache; >> + >> + for_each_compatible_node(cache, NULL, "cache") { >> + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); > > This is going to create platform devices for all caches (except L1) > regardless of whether they support MPAM or not. Isn't it likely or > possible that only L3 or SLC caches support MPAM? Likely - but all things are possible. You could put an MPAM MSC in your L1-I cache. (or even in the CPU - but lets not go there) I'll attempt to fix that up, what I have doesn't quite work, but I'll keep picking at it: | for_each_compatible_node(cache, NULL, "cache") { | struct device_node *cache_device; | | if (of_node_check_flag(cache, OF_POPULATED)) | continue; | | cache_device = of_find_matching_node_and_match(cache, mpam_of_match, NULL); | if (!cache_device) | continue; | of_node_put(cache_device); | | pdev = of_platform_device_create(cache, "cache", NULL); | if (!pdev) | pr_err_once("Failed to create MSC devices under caches\n"); | } >> + if (err) >> + pr_err("Failed to create MSC devices under caches\n"); >> + } >> +} >> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h >> new file mode 100644 >> index 000000000000..07e0f240eaca >> --- /dev/null >> +++ b/drivers/resctrl/mpam_internal.h >> @@ -0,0 +1,62 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +// Copyright (C) 2024 Arm Ltd. > > It's 2025. ... and I wrote this in 2018, but missed the annual update. I'll fix it. >> + >> +struct mpam_msc { >> + /* member of mpam_all_msc */ >> + struct list_head glbl_list; >> + >> + int id; >> + struct platform_device *pdev; >> + >> + /* Not modified after mpam_is_enabled() becomes true */ >> + enum mpam_msc_iface iface; >> + u32 pcc_subspace_id; >> + struct mbox_client pcc_cl; >> + struct pcc_mbox_chan *pcc_chan; >> + u32 nrdy_usec; >> + cpumask_t accessibility; >> + >> + /* >> + * probe_lock is only take during discovery. After discovery these > s/take/taken/ Fixed, >> + * properties become read-only and the lists are protected by SRCU. >> + */ >> + struct mutex probe_lock; >> + unsigned long ris_idxs[128 / BITS_PER_LONG]; >> + u32 ris_max; >> + >> + /* mpam_msc_ris of this component */ >> + struct list_head ris; >> + >> + /* >> + * part_sel_lock protects access to the MSC hardware registers that are >> + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary >> + * by RIS). >> + * If needed, take msc->lock first. > Stale comment? I don't see any 'lock' member. Yes, it should say probe_lock .. renamed after more locks got added in here, and the comment got fixed up in the wrong patch. Fixed. >> + */ >> + struct mutex part_sel_lock; >> + >> + /* >> + * mon_sel_lock protects access to the MSC hardware registers that are >> + * affeted by MPAMCFG_MON_SEL. >> + * If needed, take msc->lock first. >> + */ >> + struct mutex outer_mon_sel_lock; >> + raw_spinlock_t inner_mon_sel_lock; >> + unsigned long inner_mon_sel_flags; >> + >> + void __iomem *mapped_hwpage; >> + size_t mapped_hwpage_sz; >> +}; >> + >> +#endif /* MPAM_INTERNAL_H */ >> -- >> 2.20.1 >> Thanks, James
On Wed, Aug 27, 2025 at 10:39 AM Rob Herring <robh@kernel.org> wrote: > > On Fri, Aug 22, 2025 at 10:32 AM James Morse <james.morse@arm.com> wrote: > > > > Probing MPAM is convoluted. MSCs that are integrated with a CPU may > > only be accessible from those CPUs, and they may not be online. > > Touching the hardware early is pointless as MPAM can't be used until > > the system-wide common values for num_partid and num_pmg have been > > discovered. [...] > > +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) > > +{ > > + int err, num_ris = 0; > > + const u32 *ris_idx_p; > > + struct device_node *iter, *np; > > + > > + np = msc->pdev->dev.of_node; > > + for_each_child_of_node(np, iter) { > > Use for_each_available_child_of_node_scoped() > > > + ris_idx_p = of_get_property(iter, "reg", NULL); > > This is broken on big endian and new users of of_get_property() are > discouraged. Use of_property_read_reg(). Err, this is broken on little endian as the DT is big endian. So this was obviously not tested as I'm confident you didn't test on BE. Rob
Hi Rob, On 27/08/2025 17:16, Rob Herring wrote: > On Wed, Aug 27, 2025 at 10:39 AM Rob Herring <robh@kernel.org> wrote: >> >> On Fri, Aug 22, 2025 at 10:32 AM James Morse <james.morse@arm.com> wrote: >>> >>> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >>> only be accessible from those CPUs, and they may not be online. >>> Touching the hardware early is pointless as MPAM can't be used until >>> the system-wide common values for num_partid and num_pmg have been >>> discovered. > > [...] > >>> +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) >>> +{ >>> + int err, num_ris = 0; >>> + const u32 *ris_idx_p; >>> + struct device_node *iter, *np; >>> + >>> + np = msc->pdev->dev.of_node; >>> + for_each_child_of_node(np, iter) { >> >> Use for_each_available_child_of_node_scoped() >> >>> + ris_idx_p = of_get_property(iter, "reg", NULL); >> >> This is broken on big endian and new users of of_get_property() are >> discouraged. Use of_property_read_reg(). > > Err, this is broken on little endian as the DT is big endian. > > So this was obviously not tested as I'm confident you didn't test on BE. 'not tested' is shades of grey. I fed the FVP ~6 different DTB files to hit the different paths through the driver. The FVP only has controls under RIS-0, so all of those only defined RIS-0, and unsurprisingly didn't notice the helper isn't endian safe. Thanks, James
Hi James, On 8/22/25 16:29, James Morse wrote: > Probing MPAM is convoluted. MSCs that are integrated with a CPU may > only be accessible from those CPUs, and they may not be online. > Touching the hardware early is pointless as MPAM can't be used until > the system-wide common values for num_partid and num_pmg have been > discovered. > > Start with driver probe/remove and mapping the MSC. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > Changes since RFC: > * Check for status=broken DT devices. > * Moved all the files around. > * Made Kconfig symbols depend on EXPERT > --- > arch/arm64/Kconfig | 1 + > drivers/Kconfig | 2 + > drivers/Makefile | 1 + > drivers/resctrl/Kconfig | 11 ++ > drivers/resctrl/Makefile | 4 + > drivers/resctrl/mpam_devices.c | 336 ++++++++++++++++++++++++++++++++ > drivers/resctrl/mpam_internal.h | 62 ++++++ > 7 files changed, 417 insertions(+) > create mode 100644 drivers/resctrl/Kconfig > create mode 100644 drivers/resctrl/Makefile > create mode 100644 drivers/resctrl/mpam_devices.c > create mode 100644 drivers/resctrl/mpam_internal.h > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index e51ccf1da102..ea3c54e04275 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -2062,6 +2062,7 @@ config ARM64_TLB_RANGE > > config ARM64_MPAM > bool "Enable support for MPAM" > + select ARM64_MPAM_DRIVER > select ACPI_MPAM if ACPI > help > Memory Partitioning and Monitoring is an optional extension > diff --git a/drivers/Kconfig b/drivers/Kconfig > index 4915a63866b0..3054b50a2f4c 100644 > --- a/drivers/Kconfig > +++ b/drivers/Kconfig > @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" > > source "drivers/cdx/Kconfig" > > +source "drivers/resctrl/Kconfig" > + > endmenu > diff --git a/drivers/Makefile b/drivers/Makefile > index b5749cf67044..f41cf4eddeba 100644 > --- a/drivers/Makefile > +++ b/drivers/Makefile > @@ -194,5 +194,6 @@ obj-$(CONFIG_HTE) += hte/ > obj-$(CONFIG_DRM_ACCEL) += accel/ > obj-$(CONFIG_CDX_BUS) += cdx/ > obj-$(CONFIG_DPLL) += dpll/ > +obj-y += resctrl/ > > obj-$(CONFIG_S390) += s390/ > diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig > new file mode 100644 > index 000000000000..dff7b87280ab > --- /dev/null > +++ b/drivers/resctrl/Kconfig > @@ -0,0 +1,11 @@ > +# Confusingly, this is everything but the CPU bits of MPAM. CPU here means > +# CPU resources, not containers or cgroups etc. > +config ARM64_MPAM_DRIVER > + bool "MPAM driver for System IP, e,g. caches and memory controllers" > + depends on ARM64_MPAM && EXPERT > + > +config ARM64_MPAM_DRIVER_DEBUG > + bool "Enable debug messages from the MPAM driver." > + depends on ARM64_MPAM_DRIVER > + help > + Say yes here to enable debug messages from the MPAM driver. > diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile > new file mode 100644 > index 000000000000..92b48fa20108 > --- /dev/null > +++ b/drivers/resctrl/Makefile > @@ -0,0 +1,4 @@ > +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o > +mpam-y += mpam_devices.o > + > +cflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG > diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > new file mode 100644 > index 000000000000..a0d9a699a6e7 > --- /dev/null > +++ b/drivers/resctrl/mpam_devices.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. > + > +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cacheinfo.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/gfp.h> > +#include <linux/list.h> > +#include <linux/lockdep.h> > +#include <linux/mutex.h> > +#include <linux/of.h> > +#include <linux/of_platform.h> > +#include <linux/platform_device.h> > +#include <linux/printk.h> > +#include <linux/slab.h> > +#include <linux/spinlock.h> > +#include <linux/srcu.h> > +#include <linux/types.h> > + > +#include <acpi/pcc.h> > + > +#include "mpam_internal.h" > + > +/* > + * mpam_list_lock protects the SRCU lists when writing. Once the > + * mpam_enabled key is enabled these lists are read-only, > + * unless the error interrupt disables the driver. > + */ > +static DEFINE_MUTEX(mpam_list_lock); > +static LIST_HEAD(mpam_all_msc); > + > +static struct srcu_struct mpam_srcu; > + > +/* MPAM isn't available until all the MSC have been probed. */ > +static u32 mpam_num_msc; > + > +static void mpam_discovery_complete(void) > +{ > + pr_err("Discovered all MSC\n"); > +} > + > +static int mpam_dt_count_msc(void) > +{ > + int count = 0; > + struct device_node *np; > + > + for_each_compatible_node(np, NULL, "arm,mpam-msc") { > + if (of_device_is_available(np)) > + count++; > + } > + > + return count; > +} > + > +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, > + u32 ris_idx) > +{ > + int err = 0; > + u32 level = 0; > + unsigned long cache_id; > + struct device_node *cache; > + > + do { > + if (of_device_is_compatible(np, "arm,mpam-cache")) { > + cache = of_parse_phandle(np, "arm,mpam-device", 0); > + if (!cache) { > + pr_err("Failed to read phandle\n"); > + break; > + } This looks like this allows "arm,mpam-cache" and "arm,mpam-device" to be used on an msc node when there are no ris children. This usage could be reasonable but doesn't match the schema in the previous patch. Should this usage be rejected or the schema extended? > + } else if (of_device_is_compatible(np->parent, "cache")) { > + cache = of_node_get(np->parent); > + } else { > + /* For now, only caches are supported */ > + cache = NULL; > + break; > + } > + > + err = of_property_read_u32(cache, "cache-level", &level); > + if (err) { > + pr_err("Failed to read cache-level\n"); > + break; > + } > + > + cache_id = cache_of_calculate_id(cache); > + if (cache_id == ~0UL) { > + err = -ENOENT; > + break; > + } > + > + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, > + cache_id); > + } while (0); > + of_node_put(cache); > + > + return err; > +} > + > +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) > +{ > + int err, num_ris = 0; > + const u32 *ris_idx_p; > + struct device_node *iter, *np; > + > + np = msc->pdev->dev.of_node; > + for_each_child_of_node(np, iter) { > + ris_idx_p = of_get_property(iter, "reg", NULL); > + if (ris_idx_p) { > + num_ris++; > + err = mpam_dt_parse_resource(msc, iter, *ris_idx_p); > + if (err) { > + of_node_put(iter); > + return err; > + } > + } > + } > + > + if (!num_ris) > + mpam_dt_parse_resource(msc, np, 0); > + > + return err; > +} > + > +/* > + * An MSC can control traffic from a set of CPUs, but may only be accessible > + * from a (hopefully wider) set of CPUs. The common reason for this is power > + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the > + * the corresponding cache may also be powered off. By making accesses from > + * one of those CPUs, we ensure this isn't the case. > + */ > +static int update_msc_accessibility(struct mpam_msc *msc) > +{ > + struct device_node *parent; > + u32 affinity_id; > + int err; > + > + if (!acpi_disabled) { > + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", > + &affinity_id); > + if (err) > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + else > + acpi_pptt_get_cpus_from_container(affinity_id, > + &msc->accessibility); > + > + return 0; > + } > + > + /* This depends on the path to of_node */ > + parent = of_get_parent(msc->pdev->dev.of_node); > + if (parent == of_root) { > + cpumask_copy(&msc->accessibility, cpu_possible_mask); > + err = 0; > + } else { > + err = -EINVAL; > + pr_err("Cannot determine accessibility of MSC: %s\n", > + dev_name(&msc->pdev->dev)); > + } > + of_node_put(parent); > + > + return err; > +} > + > +static int fw_num_msc; > + > +static void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) > +{ > + /* TODO: wake up tasks blocked on this MSC's PCC channel */ > +} > + > +static void mpam_msc_drv_remove(struct platform_device *pdev) > +{ > + struct mpam_msc *msc = platform_get_drvdata(pdev); > + > + if (!msc) > + return; > + > + mutex_lock(&mpam_list_lock); > + mpam_num_msc--; > + platform_set_drvdata(pdev, NULL); > + list_del_rcu(&msc->glbl_list); > + synchronize_srcu(&mpam_srcu); > + devm_kfree(&pdev->dev, msc); > + mutex_unlock(&mpam_list_lock); > +} > + > +static int mpam_msc_drv_probe(struct platform_device *pdev) > +{ > + int err; > + struct mpam_msc *msc; > + struct resource *msc_res; > + void *plat_data = pdev->dev.platform_data; > + > + mutex_lock(&mpam_list_lock); > + do { > + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); > + if (!msc) { > + err = -ENOMEM; > + break; > + } > + > + mutex_init(&msc->probe_lock); > + mutex_init(&msc->part_sel_lock); > + mutex_init(&msc->outer_mon_sel_lock); > + raw_spin_lock_init(&msc->inner_mon_sel_lock); > + msc->id = mpam_num_msc++; > + msc->pdev = pdev; > + INIT_LIST_HEAD_RCU(&msc->glbl_list); > + INIT_LIST_HEAD_RCU(&msc->ris); > + > + err = update_msc_accessibility(msc); > + if (err) > + break; > + if (cpumask_empty(&msc->accessibility)) { > + pr_err_once("msc:%u is not accessible from any CPU!", > + msc->id); > + err = -EINVAL; > + break; > + } > + > + if (device_property_read_u32(&pdev->dev, "pcc-channel", > + &msc->pcc_subspace_id)) > + msc->iface = MPAM_IFACE_MMIO; > + else > + msc->iface = MPAM_IFACE_PCC; > + > + if (msc->iface == MPAM_IFACE_MMIO) { > + void __iomem *io; > + > + io = devm_platform_get_and_ioremap_resource(pdev, 0, > + &msc_res); > + if (IS_ERR(io)) { > + pr_err("Failed to map MSC base address\n"); > + err = PTR_ERR(io); > + break; > + } > + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; > + msc->mapped_hwpage = io; > + } else if (msc->iface == MPAM_IFACE_PCC) { > + msc->pcc_cl.dev = &pdev->dev; > + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; > + msc->pcc_cl.tx_block = false; > + msc->pcc_cl.tx_tout = 1000; /* 1s */ > + msc->pcc_cl.knows_txdone = false; > + > + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, > + msc->pcc_subspace_id); > + if (IS_ERR(msc->pcc_chan)) { > + pr_err("Failed to request MSC PCC channel\n"); > + err = PTR_ERR(msc->pcc_chan); > + break; > + } I don't see pcc support added in this series. Should we fail the probe if this interface is specified? (If keeping, there is a missing pcc_mbox_free_channel() on the error path.) > + } > + > + list_add_rcu(&msc->glbl_list, &mpam_all_msc); > + platform_set_drvdata(pdev, msc); > + } while (0); > + mutex_unlock(&mpam_list_lock); > + > + if (!err) { > + /* Create RIS entries described by firmware */ > + if (!acpi_disabled) > + err = acpi_mpam_parse_resources(msc, plat_data); > + else > + err = mpam_dt_parse_resources(msc, plat_data); > + } > + > + if (!err && fw_num_msc == mpam_num_msc) > + mpam_discovery_complete(); > + > + if (err && msc) > + mpam_msc_drv_remove(pdev); > + > + return err; > +} > + > +static const struct of_device_id mpam_of_match[] = { > + { .compatible = "arm,mpam-msc", }, > + {}, > +}; > +MODULE_DEVICE_TABLE(of, mpam_of_match); > + > +static struct platform_driver mpam_msc_driver = { > + .driver = { > + .name = "mpam_msc", > + .of_match_table = of_match_ptr(mpam_of_match), > + }, > + .probe = mpam_msc_drv_probe, > + .remove = mpam_msc_drv_remove, > +}; > + > +/* > + * MSC that are hidden under caches are not created as platform devices > + * as there is no cache driver. Caches are also special-cased in > + * update_msc_accessibility(). > + */ > +static void mpam_dt_create_foundling_msc(void) > +{ > + int err; > + struct device_node *cache; > + > + for_each_compatible_node(cache, NULL, "cache") { > + err = of_platform_populate(cache, mpam_of_match, NULL, NULL); > + if (err) > + pr_err("Failed to create MSC devices under caches\n"); > + } > +} > + > +static int __init mpam_msc_driver_init(void) > +{ > + if (!system_supports_mpam()) > + return -EOPNOTSUPP; > + > + init_srcu_struct(&mpam_srcu); > + > + if (!acpi_disabled) > + fw_num_msc = acpi_mpam_count_msc(); > + else > + fw_num_msc = mpam_dt_count_msc(); > + > + if (fw_num_msc <= 0) { > + pr_err("No MSC devices found in firmware\n"); > + return -EINVAL; > + } > + > + if (acpi_disabled) > + mpam_dt_create_foundling_msc(); > + > + return platform_driver_register(&mpam_msc_driver); > +} > +subsys_initcall(mpam_msc_driver_init); > diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > new file mode 100644 > index 000000000000..07e0f240eaca > --- /dev/null > +++ b/drivers/resctrl/mpam_internal.h > @@ -0,0 +1,62 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +// Copyright (C) 2024 Arm Ltd. > + > +#ifndef MPAM_INTERNAL_H > +#define MPAM_INTERNAL_H > + > +#include <linux/arm_mpam.h> > +#include <linux/cpumask.h> > +#include <linux/io.h> > +#include <linux/mailbox_client.h> > +#include <linux/mutex.h> > +#include <linux/resctrl.h> > +#include <linux/sizes.h> > + > +struct mpam_msc { > + /* member of mpam_all_msc */ > + struct list_head glbl_list; > + > + int id; > + struct platform_device *pdev; > + > + /* Not modified after mpam_is_enabled() becomes true */ > + enum mpam_msc_iface iface; > + u32 pcc_subspace_id; > + struct mbox_client pcc_cl; > + struct pcc_mbox_chan *pcc_chan; > + u32 nrdy_usec; > + cpumask_t accessibility; > + > + /* > + * probe_lock is only take during discovery. After discovery these nit: s/take/taken/ > + * properties become read-only and the lists are protected by SRCU. > + */ > + struct mutex probe_lock; > + unsigned long ris_idxs[128 / BITS_PER_LONG]; > + u32 ris_max; > + > + /* mpam_msc_ris of this component */ > + struct list_head ris; > + > + /* > + * part_sel_lock protects access to the MSC hardware registers that are > + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary > + * by RIS). > + * If needed, take msc->lock first. > + */ > + struct mutex part_sel_lock; > + > + /* > + * mon_sel_lock protects access to the MSC hardware registers that are > + * affeted by MPAMCFG_MON_SEL. nit: s/affeted/affected/ > + * If needed, take msc->lock first. > + */ > + struct mutex outer_mon_sel_lock; > + raw_spinlock_t inner_mon_sel_lock; > + unsigned long inner_mon_sel_flags; > + > + void __iomem *mapped_hwpage; > + size_t mapped_hwpage_sz; > +}; > + > +#endif /* MPAM_INTERNAL_H */ Thanks, Ben
Hi Ben, On 27/08/2025 14:03, Ben Horgan wrote: > On 8/22/25 16:29, James Morse wrote: >> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >> only be accessible from those CPUs, and they may not be online. >> Touching the hardware early is pointless as MPAM can't be used until >> the system-wide common values for num_partid and num_pmg have been >> discovered. >> >> Start with driver probe/remove and mapping the MSC. >> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c >> new file mode 100644 >> index 000000000000..a0d9a699a6e7 >> --- /dev/null >> +++ b/drivers/resctrl/mpam_devices.c >> @@ -0,0 +1,336 @@ >> +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, >> + u32 ris_idx) >> +{ >> + int err = 0; >> + u32 level = 0; >> + unsigned long cache_id; >> + struct device_node *cache; >> + >> + do { >> + if (of_device_is_compatible(np, "arm,mpam-cache")) { >> + cache = of_parse_phandle(np, "arm,mpam-device", 0); >> + if (!cache) { >> + pr_err("Failed to read phandle\n"); >> + break; >> + } > This looks like this allows "arm,mpam-cache" and "arm,mpam-device" to be > used on an msc node when there are no ris children. This usage could be > reasonable but doesn't match the schema in the previous patch. Should > this usage be rejected or the schema extended? The DT/ACPI stuff is only going to describe the things that make sense at a high level, e.g. the controls for the L3. There may be other controls for stuff that doesn't make sense in the hardware - these get discovered, grouped as 'unknown' and left alone. Another angle on this is where there is an MSC that the OS will never make use of, but needs to know about to find the system wide minimum value. (there is a comment about this in the ACPI spec...) I don't think its a problem if the magic dt-binding machinery is overly restrictive, that is about validating DTB files... >> + } else if (of_device_is_compatible(np->parent, "cache")) { >> + cache = of_node_get(np->parent); >> + } else { >> + /* For now, only caches are supported */ >> + cache = NULL; >> + break; >> + } >> + >> + err = of_property_read_u32(cache, "cache-level", &level); >> + if (err) { >> + pr_err("Failed to read cache-level\n"); >> + break; >> + } >> + >> + cache_id = cache_of_calculate_id(cache); >> + if (cache_id == ~0UL) { >> + err = -ENOENT; >> + break; >> + } >> + >> + err = mpam_ris_create(msc, ris_idx, MPAM_CLASS_CACHE, level, >> + cache_id); >> + } while (0); >> + of_node_put(cache); >> + >> + return err; >> +} >> +static int mpam_msc_drv_probe(struct platform_device *pdev) >> +{ >> + int err; >> + struct mpam_msc *msc; >> + struct resource *msc_res; >> + void *plat_data = pdev->dev.platform_data; >> + >> + mutex_lock(&mpam_list_lock); >> + do { >> + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); >> + if (!msc) { >> + err = -ENOMEM; >> + break; >> + } >> + >> + mutex_init(&msc->probe_lock); >> + mutex_init(&msc->part_sel_lock); >> + mutex_init(&msc->outer_mon_sel_lock); >> + raw_spin_lock_init(&msc->inner_mon_sel_lock); >> + msc->id = mpam_num_msc++; >> + msc->pdev = pdev; >> + INIT_LIST_HEAD_RCU(&msc->glbl_list); >> + INIT_LIST_HEAD_RCU(&msc->ris); >> + >> + err = update_msc_accessibility(msc); >> + if (err) >> + break; >> + if (cpumask_empty(&msc->accessibility)) { >> + pr_err_once("msc:%u is not accessible from any CPU!", >> + msc->id); >> + err = -EINVAL; >> + break; >> + } >> + >> + if (device_property_read_u32(&pdev->dev, "pcc-channel", >> + &msc->pcc_subspace_id)) >> + msc->iface = MPAM_IFACE_MMIO; >> + else >> + msc->iface = MPAM_IFACE_PCC; >> + >> + if (msc->iface == MPAM_IFACE_MMIO) { >> + void __iomem *io; >> + >> + io = devm_platform_get_and_ioremap_resource(pdev, 0, >> + &msc_res); >> + if (IS_ERR(io)) { >> + pr_err("Failed to map MSC base address\n"); >> + err = PTR_ERR(io); >> + break; >> + } >> + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; >> + msc->mapped_hwpage = io; >> + } else if (msc->iface == MPAM_IFACE_PCC) { >> + msc->pcc_cl.dev = &pdev->dev; >> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; >> + msc->pcc_cl.tx_block = false; >> + msc->pcc_cl.tx_tout = 1000; /* 1s */ >> + msc->pcc_cl.knows_txdone = false; >> + >> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, >> + msc->pcc_subspace_id); >> + if (IS_ERR(msc->pcc_chan)) { >> + pr_err("Failed to request MSC PCC channel\n"); >> + err = PTR_ERR(msc->pcc_chan); >> + break; >> + } > I don't see pcc support added in this series. Should we fail the probe > if this interface is specified? I've got patches from Andre P to support it on DT - but the platforms that need it keeping popping in and out of existence. I'll pull these bits out - they were intended to check the ACPI table wasn't totally rotten... > (If keeping, there is a missing pcc_mbox_free_channel() on the error path.) When pcc_mbox_request_channel() fails? It already called mbox_free_channel() itself. >> + } >> + >> + list_add_rcu(&msc->glbl_list, &mpam_all_msc); >> + platform_set_drvdata(pdev, msc); >> + } while (0); >> + mutex_unlock(&mpam_list_lock); >> + >> + if (!err) { >> + /* Create RIS entries described by firmware */ >> + if (!acpi_disabled) >> + err = acpi_mpam_parse_resources(msc, plat_data); >> + else >> + err = mpam_dt_parse_resources(msc, plat_data); >> + } >> + >> + if (!err && fw_num_msc == mpam_num_msc) >> + mpam_discovery_complete(); >> + >> + if (err && msc) >> + mpam_msc_drv_remove(pdev); >> + >> + return err; >> +} >> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h >> new file mode 100644 >> index 000000000000..07e0f240eaca >> --- /dev/null >> +++ b/drivers/resctrl/mpam_internal.h >> @@ -0,0 +1,62 @@ >> +struct mpam_msc { >> + /* member of mpam_all_msc */ >> + struct list_head glbl_list; >> + >> + int id; >> + struct platform_device *pdev; >> + >> + /* Not modified after mpam_is_enabled() becomes true */ >> + enum mpam_msc_iface iface; >> + u32 pcc_subspace_id; >> + struct mbox_client pcc_cl; >> + struct pcc_mbox_chan *pcc_chan; >> + u32 nrdy_usec; >> + cpumask_t accessibility; >> + >> + /* >> + * probe_lock is only take during discovery. After discovery these > nit: s/take/taken/ Fixed, >> + * properties become read-only and the lists are protected by SRCU. >> + */ >> + struct mutex probe_lock; >> + unsigned long ris_idxs[128 / BITS_PER_LONG]; >> + u32 ris_max; >> + >> + /* mpam_msc_ris of this component */ >> + struct list_head ris; >> + >> + /* >> + * part_sel_lock protects access to the MSC hardware registers that are >> + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary >> + * by RIS). >> + * If needed, take msc->lock first. >> + */ >> + struct mutex part_sel_lock; >> + >> + /* >> + * mon_sel_lock protects access to the MSC hardware registers that are >> + * affeted by MPAMCFG_MON_SEL. > nit: s/affeted/affected/ Fixed, >> + * If needed, take msc->lock first. >> + */ >> + struct mutex outer_mon_sel_lock; >> + raw_spinlock_t inner_mon_sel_lock; >> + unsigned long inner_mon_sel_flags; >> + >> + void __iomem *mapped_hwpage; >> + size_t mapped_hwpage_sz; >> +}; >> + >> +#endif /* MPAM_INTERNAL_H */ Thanks, James
Hi James, On 9/5/25 19:48, James Morse wrote: > Hi Ben, > > On 27/08/2025 14:03, Ben Horgan wrote: >> On 8/22/25 16:29, James Morse wrote: >>> Probing MPAM is convoluted. MSCs that are integrated with a CPU may >>> only be accessible from those CPUs, and they may not be online. >>> Touching the hardware early is pointless as MPAM can't be used until >>> the system-wide common values for num_partid and num_pmg have been >>> discovered. >>> >>> Start with driver probe/remove and mapping the MSC. > >>> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c >>> new file mode 100644 >>> index 000000000000..a0d9a699a6e7 >>> --- /dev/null >>> +++ b/drivers/resctrl/mpam_devices.c >>> @@ -0,0 +1,336 @@ > >>> +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, >>> + u32 ris_idx) >>> +{ >>> + int err = 0; >>> + u32 level = 0; >>> + unsigned long cache_id; >>> + struct device_node *cache; >>> + >>> + do { >>> + if (of_device_is_compatible(np, "arm,mpam-cache")) { >>> + cache = of_parse_phandle(np, "arm,mpam-device", 0); >>> + if (!cache) { >>> + pr_err("Failed to read phandle\n"); >>> + break; >>> + } >> This looks like this allows "arm,mpam-cache" and "arm,mpam-device" to be >> used on an msc node when there are no ris children. This usage could be >> reasonable but doesn't match the schema in the previous patch. Should >> this usage be rejected or the schema extended? > > The DT/ACPI stuff is only going to describe the things that make sense at a high level, > e.g. the controls for the L3. There may be other controls for stuff that doesn't make > sense in the hardware - these get discovered, grouped as 'unknown' and left alone. > > Another angle on this is where there is an MSC that the OS will never make use of, but > needs to know about to find the system wide minimum value. (there is a comment about > this in the ACPI spec...) > > I don't think its a problem if the magic dt-binding machinery is overly restrictive, that > is about validating DTB files... I agree with your points. However, I was rather thinking that the code allows more ways to describe the same thing than the schema does. In that, you could write something like: msc@80000 { compatible = "foo,a-standalone-msc"; reg = <0x80000 0x1000>; ... msc@10000 { compatible = "arm,mpam-msc arm,mpam-cache"; arm,mpam-device = <&mem>; ... } } Although, now I've written this out, it doesn't seem sensible to worry about this. Using ris compatibles on an msc, as in my example, is clearly an error. > > [snip] >>> + } else if (msc->iface == MPAM_IFACE_PCC) { >>> + msc->pcc_cl.dev = &pdev->dev; >>> + msc->pcc_cl.rx_callback = mpam_pcc_rx_callback; >>> + msc->pcc_cl.tx_block = false; >>> + msc->pcc_cl.tx_tout = 1000; /* 1s */ >>> + msc->pcc_cl.knows_txdone = false; >>> + >>> + msc->pcc_chan = pcc_mbox_request_channel(&msc->pcc_cl, >>> + msc->pcc_subspace_id); >>> + if (IS_ERR(msc->pcc_chan)) { >>> + pr_err("Failed to request MSC PCC channel\n"); >>> + err = PTR_ERR(msc->pcc_chan); >>> + break; >>> + } >> I don't see pcc support added in this series. Should we fail the probe >> if this interface is specified? > > I've got patches from Andre P to support it on DT - but the platforms that need it keeping > popping in and out of existence. I'll pull these bits out - they were intended to check > the ACPI table wasn't totally rotten... > > >> (If keeping, there is a missing pcc_mbox_free_channel() on the error path.) > > When pcc_mbox_request_channel() fails? It already called mbox_free_channel() itself. Apologies, this was relating to if the *_parse_resources() call below failed. > > >>> + } >>> + >>> + list_add_rcu(&msc->glbl_list, &mpam_all_msc); >>> + platform_set_drvdata(pdev, msc); >>> + } while (0); >>> + mutex_unlock(&mpam_list_lock); >>> + >>> + if (!err) { >>> + /* Create RIS entries described by firmware */ >>> + if (!acpi_disabled) >>> + err = acpi_mpam_parse_resources(msc, plat_data); >>> + else >>> + err = mpam_dt_parse_resources(msc, plat_data); >>> + } >>> + >>> + if (!err && fw_num_msc == mpam_num_msc) >>> + mpam_discovery_complete(); >>> + >>> + if (err && msc) >>> + mpam_msc_drv_remove(pdev); >>> + >>> + return err; >>> +} [snip]> > Thanks, > > James Thanks, Ben
… … > +static int mpam_msc_drv_probe(struct platform_device *pdev) > +{ … > + } while (0); > + mutex_unlock(&mpam_list_lock); > + > + if (!err) { > + /* Create RIS entries described by firmware */ > + if (!acpi_disabled) > + err = acpi_mpam_parse_resources(msc, plat_data); > + else > + err = mpam_dt_parse_resources(msc, plat_data); > + } > + > + if (!err && fw_num_msc == mpam_num_msc) > + mpam_discovery_complete(); > + > + if (err && msc) > + mpam_msc_drv_remove(pdev); > + > + return err; > +} … * Would you like to integrate anything from the following source code variant? if (!err) /* Create RIS entries described by firmware */ err = acpi_disabled ? mpam_dt_parse_resources(msc, plat_data) : acpi_mpam_parse_resources(msc, plat_data); if (err) { if (msc) mpam_msc_drv_remove(pdev); } else { if (fw_num_msc == mpam_num_msc) mpam_discovery_complete(); } * How do you think about to increase the application of scope-based resource management at further places? Regards, Markus
On Fri, Aug 22, 2025 at 09:55:33PM +0200, Markus Elfring wrote: > … > … > > +static int mpam_msc_drv_probe(struct platform_device *pdev) > > +{ > … > > + } while (0); > > + mutex_unlock(&mpam_list_lock); > > + > > + if (!err) { > > + /* Create RIS entries described by firmware */ > > + if (!acpi_disabled) > > + err = acpi_mpam_parse_resources(msc, plat_data); > > + else > > + err = mpam_dt_parse_resources(msc, plat_data); > > + } > > + > > + if (!err && fw_num_msc == mpam_num_msc) > > + mpam_discovery_complete(); > > + > > + if (err && msc) > > + mpam_msc_drv_remove(pdev); > > + > > + return err; > > +} > … > > * Would you like to integrate anything from the following source code variant? > > if (!err) > /* Create RIS entries described by firmware */ > err = acpi_disabled > ? mpam_dt_parse_resources(msc, plat_data) > : acpi_mpam_parse_resources(msc, plat_data); > > if (err) { > if (msc) > mpam_msc_drv_remove(pdev); > } else { > if (fw_num_msc == mpam_num_msc) > mpam_discovery_complete(); > } > > * How do you think about to increase the application of scope-based resource management > at further places? > > > Regards, > Markus Hi, This is the semi-friendly patch-bot of Greg Kroah-Hartman. Markus, you seem to have sent a nonsensical or otherwise pointless review comment to a patch submission on a Linux kernel developer mailing list. I strongly suggest that you not do this anymore. Please do not bother developers who are actively working to produce patches and features with comments that, in the end, are a waste of time. Patch submitter, please ignore Markus's suggestion; you do not need to follow it at all. The person/bot/AI that sent it is being ignored by almost all Linux kernel maintainers for having a persistent pattern of behavior of producing distracting and pointless commentary, and inability to adapt to feedback. Please feel free to also ignore emails from them. thanks, greg k-h's patch email bot
… > +++ b/drivers/resctrl/mpam_devices.c > @@ -0,0 +1,336 @@ … > +static void mpam_msc_drv_remove(struct platform_device *pdev) > +{ … > + mutex_lock(&mpam_list_lock); > + mpam_num_msc--; … > + devm_kfree(&pdev->dev, msc); > + mutex_unlock(&mpam_list_lock); > +} … Under which circumstances would you become interested to apply a statement like “guard(mutex)(&mpam_list_lock);”? https://elixir.bootlin.com/linux/v6.17-rc2/source/include/linux/mutex.h#L228 Regards, Markus
© 2016 - 2025 Red Hat, Inc.