Add code to parse the arm64 specific MPAM table, looking up the cache
level from the PPTT and feeding the end result into the MPAM driver.
CC: Carl Worth <carl@os.amperecomputing.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/Kconfig | 1 +
drivers/acpi/arm64/Kconfig | 3 +
drivers/acpi/arm64/Makefile | 1 +
drivers/acpi/arm64/mpam.c | 365 ++++++++++++++++++++++++++++++++++++
drivers/acpi/tables.c | 2 +-
include/linux/arm_mpam.h | 46 +++++
6 files changed, 417 insertions(+), 1 deletion(-)
create mode 100644 drivers/acpi/arm64/mpam.c
create mode 100644 include/linux/arm_mpam.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5f08214537d0..ad9a49a39e41 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2060,6 +2060,7 @@ config ARM64_TLB_RANGE
config ARM64_MPAM
bool "Enable support for MPAM"
+ select ACPI_MPAM if ACPI
help
Memory Partitioning and Monitoring is an optional extension
that allows the CPUs to mark load and store transactions with
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index b3ed6212244c..f2fd79f22e7d 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -21,3 +21,6 @@ config ACPI_AGDI
config ACPI_APMT
bool
+
+config ACPI_MPAM
+ bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 05ecde9eaabe..27b872249baa 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-$(CONFIG_ACPI_IORT) += iort.o
obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o
obj-$(CONFIG_ARM_AMBA) += amba.o
+obj-$(CONFIG_ACPI_MPAM) += mpam.o
obj-y += dma.o init.o
obj-y += thermal_cpufreq.o
diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c
new file mode 100644
index 000000000000..f4791bac9a2a
--- /dev/null
+++ b/drivers/acpi/arm64/mpam.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Arm Ltd.
+
+/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */
+
+#define pr_fmt(fmt) "ACPI MPAM: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_mpam.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/platform_device.h>
+
+#include <acpi/processor.h>
+
+/* Flags for acpi_table_mpam_msc.*_interrupt_flags */
+#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1
+#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3 << 1)
+#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER BIT(3)
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4)
+
+static bool frob_irq(struct platform_device *pdev, int intid, u32 flags,
+ int *irq, u32 processor_container_uid)
+{
+ int sense;
+
+ if (!intid)
+ return false;
+
+ /* 0 in this field indicates a wired interrupt */
+ if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK)
+ return false;
+
+ if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE)
+ sense = ACPI_EDGE_SENSITIVE;
+ else
+ sense = ACPI_LEVEL_SENSITIVE;
+
+ /*
+ * If the GSI is in the GIC's PPI range, try and create a partitioned
+ * percpu interrupt.
+ */
+ if (16 <= intid && intid < 32 && processor_container_uid != ~0) {
+ pr_err_once("Partitioned interrupts not supported\n");
+ return false;
+ }
+
+ *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH);
+ if (*irq <= 0) {
+ pr_err_once("Failed to register interrupt 0x%x with ACPI\n",
+ intid);
+ return false;
+ }
+
+ return true;
+}
+
+static void acpi_mpam_parse_irqs(struct platform_device *pdev,
+ struct acpi_mpam_msc_node *tbl_msc,
+ struct resource *res, int *res_idx)
+{
+ u32 flags, aff = ~0;
+ int irq;
+
+ flags = tbl_msc->overflow_interrupt_flags;
+ if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID &&
+ flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER)
+ aff = tbl_msc->overflow_interrupt_affinity;
+ if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) {
+ res[*res_idx].start = irq;
+ res[*res_idx].end = irq;
+ res[*res_idx].flags = IORESOURCE_IRQ;
+ res[*res_idx].name = "overflow";
+
+ (*res_idx)++;
+ }
+
+ flags = tbl_msc->error_interrupt_flags;
+ if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID &&
+ flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER)
+ aff = tbl_msc->error_interrupt_affinity;
+ else
+ aff = ~0;
+ if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) {
+ res[*res_idx].start = irq;
+ res[*res_idx].end = irq;
+ res[*res_idx].flags = IORESOURCE_IRQ;
+ res[*res_idx].name = "error";
+
+ (*res_idx)++;
+ }
+}
+
+static int acpi_mpam_parse_resource(struct mpam_msc *msc,
+ struct acpi_mpam_resource_node *res)
+{
+ int level, nid;
+ u32 cache_id;
+
+ switch (res->locator_type) {
+ case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE:
+ cache_id = res->locator.cache_locator.cache_reference;
+ level = find_acpi_cache_level_from_id(cache_id);
+ if (level < 0) {
+ pr_err_once("Bad level (%u) for cache with id %u\n", level, cache_id);
+ return -EINVAL;
+ }
+ return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE,
+ level, cache_id);
+ case ACPI_MPAM_LOCATION_TYPE_MEMORY:
+ nid = pxm_to_node(res->locator.memory_locator.proximity_domain);
+ if (nid == NUMA_NO_NODE)
+ nid = 0;
+ return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY,
+ 255, nid);
+ default:
+ /* These get discovered later and treated as unknown */
+ return 0;
+ }
+}
+
+int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc)
+{
+ int i, err;
+ struct acpi_mpam_resource_node *resources;
+
+ resources = (struct acpi_mpam_resource_node *)(tbl_msc + 1);
+ for (i = 0; i < tbl_msc->num_resource_nodes; i++) {
+ err = acpi_mpam_parse_resource(msc, &resources[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc,
+ struct platform_device *pdev,
+ u32 *acpi_id)
+{
+ bool acpi_id_valid = false;
+ struct acpi_device *buddy;
+ char hid[16], uid[16];
+ int err;
+
+ memset(&hid, 0, sizeof(hid));
+ memcpy(hid, &tbl_msc->hardware_id_linked_device,
+ sizeof(tbl_msc->hardware_id_linked_device));
+
+ if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) {
+ *acpi_id = tbl_msc->instance_id_linked_device;
+ acpi_id_valid = true;
+ }
+
+ err = snprintf(uid, sizeof(uid), "%u",
+ tbl_msc->instance_id_linked_device);
+ if (err < 0 || err >= sizeof(uid))
+ return acpi_id_valid;
+
+ buddy = acpi_dev_get_first_match_dev(hid, uid, -1);
+ if (buddy)
+ device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS);
+
+ return acpi_id_valid;
+}
+
+static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc,
+ enum mpam_msc_iface *iface)
+{
+ switch (tbl_msc->interface_type) {
+ case 0:
+ *iface = MPAM_IFACE_MMIO;
+ return 0;
+ case 0xa:
+ *iface = MPAM_IFACE_PCC;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int __init _parse_table(struct acpi_table_header *table)
+{
+ char *table_end, *table_offset = (char *)(table + 1);
+ struct property_entry props[4]; /* needs a sentinel */
+ struct acpi_mpam_msc_node *tbl_msc;
+ int next_res, next_prop, err = 0;
+ struct acpi_device *companion;
+ struct platform_device *pdev;
+ enum mpam_msc_iface iface;
+ struct resource res[3];
+ char uid[16];
+ u32 acpi_id;
+
+ table_end = (char *)table + table->length;
+
+ while (table_offset < table_end) {
+ tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
+ table_offset += tbl_msc->length;
+
+ /*
+ * If any of the reserved fields are set, make no attempt to
+ * parse the msc structure. This will prevent the driver from
+ * probing all the MSC, meaning it can't discover the system
+ * wide supported partid and pmg ranges. This avoids whatever
+ * this MSC is truncating the partids and creating a screaming
+ * error interrupt.
+ */
+ if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2)
+ continue;
+
+ if (decode_interface_type(tbl_msc, &iface))
+ continue;
+
+ next_res = 0;
+ next_prop = 0;
+ memset(res, 0, sizeof(res));
+ memset(props, 0, sizeof(props));
+
+ pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier);
+ if (IS_ERR(pdev)) {
+ err = PTR_ERR(pdev);
+ break;
+ }
+
+ if (tbl_msc->length < sizeof(*tbl_msc)) {
+ err = -EINVAL;
+ break;
+ }
+
+ /* Some power management is described in the namespace: */
+ err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier);
+ if (err > 0 && err < sizeof(uid)) {
+ companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1);
+ if (companion)
+ ACPI_COMPANION_SET(&pdev->dev, companion);
+ }
+
+ if (iface == MPAM_IFACE_MMIO) {
+ res[next_res].name = "MPAM:MSC";
+ res[next_res].start = tbl_msc->base_address;
+ res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1;
+ res[next_res].flags = IORESOURCE_MEM;
+ next_res++;
+ } else if (iface == MPAM_IFACE_PCC) {
+ props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel",
+ tbl_msc->base_address);
+ next_prop++;
+ }
+
+ acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res);
+ err = platform_device_add_resources(pdev, res, next_res);
+ if (err)
+ break;
+
+ props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us",
+ tbl_msc->max_nrdy_usec);
+
+ /*
+ * The MSC's CPU affinity is described via its linked power
+ * management device, but only if it points at a Processor or
+ * Processor Container.
+ */
+ if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) {
+ props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity",
+ acpi_id);
+ }
+
+ err = device_create_managed_software_node(&pdev->dev, props,
+ NULL);
+ if (err)
+ break;
+
+ /* Come back later if you want the RIS too */
+ err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length);
+ if (err)
+ break;
+
+ platform_device_add(pdev);
+ }
+
+ if (err)
+ platform_device_put(pdev);
+
+ return err;
+}
+
+static struct acpi_table_header *get_table(void)
+{
+ struct acpi_table_header *table;
+ acpi_status status;
+
+ if (acpi_disabled || !system_supports_mpam())
+ return NULL;
+
+ status = acpi_get_table(ACPI_SIG_MPAM, 0, &table);
+ if (ACPI_FAILURE(status))
+ return NULL;
+
+ if (table->revision != 1)
+ return NULL;
+
+ return table;
+}
+
+static int __init acpi_mpam_parse(void)
+{
+ struct acpi_table_header *mpam;
+ int err;
+
+ mpam = get_table();
+ if (!mpam)
+ return 0;
+
+ err = _parse_table(mpam);
+ acpi_put_table(mpam);
+
+ return err;
+}
+
+static int _count_msc(struct acpi_table_header *table)
+{
+ char *table_end, *table_offset = (char *)(table + 1);
+ struct acpi_mpam_msc_node *tbl_msc;
+ int ret = 0;
+
+ tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
+ table_end = (char *)table + table->length;
+
+ while (table_offset < table_end) {
+ if (tbl_msc->length < sizeof(*tbl_msc))
+ return -EINVAL;
+
+ ret++;
+
+ table_offset += tbl_msc->length;
+ tbl_msc = (struct acpi_mpam_msc_node *)table_offset;
+ }
+
+ return ret;
+}
+
+int acpi_mpam_count_msc(void)
+{
+ struct acpi_table_header *mpam;
+ int ret;
+
+ mpam = get_table();
+ if (!mpam)
+ return 0;
+
+ ret = _count_msc(mpam);
+ acpi_put_table(mpam);
+
+ return ret;
+}
+
+/*
+ * Call after ACPI devices have been created, which happens behind acpi_scan_init()
+ * called from subsys_initcall(). PCC requires the mailbox driver, which is
+ * initialised from postcore_initcall().
+ */
+subsys_initcall_sync(acpi_mpam_parse);
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index fa9bb8c8ce95..835e3795ede3 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -408,7 +408,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __nonstring_array __initconst
ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI,
- ACPI_SIG_NBFT };
+ ACPI_SIG_NBFT, ACPI_SIG_MPAM };
#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
new file mode 100644
index 000000000000..0edefa6ba019
--- /dev/null
+++ b/include/linux/arm_mpam.h
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Arm Ltd. */
+
+#ifndef __LINUX_ARM_MPAM_H
+#define __LINUX_ARM_MPAM_H
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+struct mpam_msc;
+
+enum mpam_msc_iface {
+ MPAM_IFACE_MMIO, /* a real MPAM MSC */
+ MPAM_IFACE_PCC, /* a fake MPAM MSC */
+};
+
+enum mpam_class_types {
+ MPAM_CLASS_CACHE, /* Well known caches, e.g. L2 */
+ MPAM_CLASS_MEMORY, /* Main memory */
+ MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */
+};
+
+#ifdef CONFIG_ACPI_MPAM
+/* Parse the ACPI description of resources entries for this MSC. */
+int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc);
+
+int acpi_mpam_count_msc(void);
+#else
+static inline int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc)
+{
+ return -EINVAL;
+}
+
+static inline int acpi_mpam_count_msc(void) { return -EINVAL; }
+#endif
+
+static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
+ enum mpam_class_types type, u8 class_id,
+ int component_id)
+{
+ return -EINVAL;
+}
+
+#endif /* __LINUX_ARM_MPAM_H */
--
2.39.5
Hi James, On 11/07/2025 19:36, James Morse wrote: > Add code to parse the arm64 specific MPAM table, looking up the cache > level from the PPTT and feeding the end result into the MPAM driver. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > arch/arm64/Kconfig | 1 + > drivers/acpi/arm64/Kconfig | 3 + > drivers/acpi/arm64/Makefile | 1 + > drivers/acpi/arm64/mpam.c | 365 ++++++++++++++++++++++++++++++++++++ > drivers/acpi/tables.c | 2 +- > include/linux/arm_mpam.h | 46 +++++ > 6 files changed, 417 insertions(+), 1 deletion(-) > create mode 100644 drivers/acpi/arm64/mpam.c > create mode 100644 include/linux/arm_mpam.h [snip] > diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c > new file mode 100644 > index 000000000000..f4791bac9a2a > --- /dev/null > +++ b/drivers/acpi/arm64/mpam.c > @@ -0,0 +1,365 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. > + > +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ > + > +#define pr_fmt(fmt) "ACPI MPAM: " fmt > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/platform_device.h> > + > +#include <acpi/processor.h> > + > +/* Flags for acpi_table_mpam_msc.*_interrupt_flags */ > +#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1 > +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3 << 1) > +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 > +#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER BIT(3) > +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) > + > +static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, > + int *irq, u32 processor_container_uid) > +{ > + int sense; > + > + if (!intid) > + return false; > + > + /* 0 in this field indicates a wired interrupt */ > + if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK) > + return false; > + > + if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE) > + sense = ACPI_EDGE_SENSITIVE; > + else > + sense = ACPI_LEVEL_SENSITIVE; > + > + /* > + * If the GSI is in the GIC's PPI range, try and create a partitioned > + * percpu interrupt. > + */ > + if (16 <= intid && intid < 32 && processor_container_uid != ~0) { > + pr_err_once("Partitioned interrupts not supported\n"); > + return false; > + } > + > + *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH); > + if (*irq <= 0) { > + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", > + intid); > + return false; > + } > + > + return true; > +} > + > +static void acpi_mpam_parse_irqs(struct platform_device *pdev, > + struct acpi_mpam_msc_node *tbl_msc, > + struct resource *res, int *res_idx) > +{ > + u32 flags, aff = ~0; > + int irq; > + > + flags = tbl_msc->overflow_interrupt_flags; > + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && > + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) > + aff = tbl_msc->overflow_interrupt_affinity; > + if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) { > + res[*res_idx].start = irq; > + res[*res_idx].end = irq; > + res[*res_idx].flags = IORESOURCE_IRQ; > + res[*res_idx].name = "overflow"; > + > + (*res_idx)++; > + } > + > + flags = tbl_msc->error_interrupt_flags; > + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && > + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) > + aff = tbl_msc->error_interrupt_affinity; > + else > + aff = ~0; > + if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) { > + res[*res_idx].start = irq; > + res[*res_idx].end = irq; > + res[*res_idx].flags = IORESOURCE_IRQ; > + res[*res_idx].name = "error"; > + > + (*res_idx)++; > + } > +} > + > +static int acpi_mpam_parse_resource(struct mpam_msc *msc, > + struct acpi_mpam_resource_node *res) > +{ > + int level, nid; > + u32 cache_id; > + > + switch (res->locator_type) { > + case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: > + cache_id = res->locator.cache_locator.cache_reference; > + level = find_acpi_cache_level_from_id(cache_id); > + if (level < 0) { > + pr_err_once("Bad level (%u) for cache with id %u\n", level, cache_id); > + return -EINVAL; Nit: More robust to check for level <= 0. Thanks, Ben
Hi Ben, On 24/07/2025 11:50, Ben Horgan wrote: > On 11/07/2025 19:36, James Morse wrote: >> Add code to parse the arm64 specific MPAM table, looking up the cache >> level from the PPTT and feeding the end result into the MPAM driver. >> diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c >> new file mode 100644 >> index 000000000000..f4791bac9a2a >> --- /dev/null >> +++ b/drivers/acpi/arm64/mpam.c >> @@ -0,0 +1,365 @@ >> +static int acpi_mpam_parse_resource(struct mpam_msc *msc, >> + struct acpi_mpam_resource_node *res) >> +{ >> + int level, nid; >> + u32 cache_id; >> + >> + switch (res->locator_type) { >> + case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: >> + cache_id = res->locator.cache_locator.cache_reference; >> + level = find_acpi_cache_level_from_id(cache_id); >> + if (level < 0) { >> + pr_err_once("Bad level (%u) for cache with id %u\n", level, cache_id); >> + return -EINVAL; > Nit: More robust to check for level <= 0. Sure, that can probably happen! Thanks, James
On Fri, 11 Jul 2025 18:36:22 +0000 James Morse <james.morse@arm.com> wrote: > Add code to parse the arm64 specific MPAM table, looking up the cache > level from the PPTT and feeding the end result into the MPAM driver. Throw in a link to the spec perhaps? Particularly useful to know which version this was written against when reviewing it. > > CC: Carl Worth <carl@os.amperecomputing.com> > Signed-off-by: James Morse <james.morse@arm.com> > --- > arch/arm64/Kconfig | 1 + > drivers/acpi/arm64/Kconfig | 3 + > drivers/acpi/arm64/Makefile | 1 + > drivers/acpi/arm64/mpam.c | 365 ++++++++++++++++++++++++++++++++++++ > drivers/acpi/tables.c | 2 +- > include/linux/arm_mpam.h | 46 +++++ > 6 files changed, 417 insertions(+), 1 deletion(-) > create mode 100644 drivers/acpi/arm64/mpam.c > create mode 100644 include/linux/arm_mpam.h > > diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile > index 05ecde9eaabe..27b872249baa 100644 > --- a/drivers/acpi/arm64/Makefile > +++ b/drivers/acpi/arm64/Makefile > @@ -6,5 +6,6 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o > obj-$(CONFIG_ACPI_IORT) += iort.o > obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o > obj-$(CONFIG_ARM_AMBA) += amba.o > +obj-$(CONFIG_ACPI_MPAM) += mpam.o Keep it with the ACPI ones? There doesn't seem to be a lot of order in here though so I guess maybe there is logic behind putting it here I'm missing. > obj-y += dma.o init.o > obj-y += thermal_cpufreq.o > diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c > new file mode 100644 > index 000000000000..f4791bac9a2a > --- /dev/null > +++ b/drivers/acpi/arm64/mpam.c > @@ -0,0 +1,365 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (C) 2025 Arm Ltd. > + > +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ > + > +#define pr_fmt(fmt) "ACPI MPAM: " fmt > + > +#include <linux/acpi.h> > +#include <linux/arm_mpam.h> > +#include <linux/cpu.h> > +#include <linux/cpumask.h> > +#include <linux/platform_device.h> > + > +#include <acpi/processor.h> > + > +/* Flags for acpi_table_mpam_msc.*_interrupt_flags */ References.. I'm looking at 3.0-alpha table 5 to check this. I can see why you might be reluctant to point at an alpha if that is what you are using ;) > +#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1 > +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3 << 1) GENMASK(3, 2) would be my preference for how to do masks in new code. > +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 > +#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER BIT(3) > +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) > + > +static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, > + int *irq, u32 processor_container_uid) > +{ > + int sense; > + > + if (!intid) > + return false; > + > + /* 0 in this field indicates a wired interrupt */ > + if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK) I'd prefer more explicit code (and probably no comment) if (FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_TYPE_MASK) != ACPI_MPAM_MSC_IRQ_TYPE_WIRED) return false; > + return false; > + > + if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE) > + sense = ACPI_EDGE_SENSITIVE; > + else > + sense = ACPI_LEVEL_SENSITIVE; If the spec is supposed to be using standard ACPI_* types for this field (I don't think the connection is explicitly documented though) then sense = FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_MODE_MASK); Assuming a change to define the mask and rely on the ACPI defs for the values This one is entirely up to you. > + > + /* > + * If the GSI is in the GIC's PPI range, try and create a partitioned > + * percpu interrupt. > + */ > + if (16 <= intid && intid < 32 && processor_container_uid != ~0) { > + pr_err_once("Partitioned interrupts not supported\n"); > + return false; > + } > + > + *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH); > + if (*irq <= 0) { > + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", > + intid); > + return false; > + } > + > + return true; > +} > + > +static void acpi_mpam_parse_irqs(struct platform_device *pdev, > + struct acpi_mpam_msc_node *tbl_msc, > + struct resource *res, int *res_idx) > +{ > + u32 flags, aff = ~0; > + int irq; > + > + flags = tbl_msc->overflow_interrupt_flags; > + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && > + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) > + aff = tbl_msc->overflow_interrupt_affinity; Just to make the two cases look the same I'd do else aff = ~0; here as well and not initialize above. It's not quite worth using a helper function for these two identical blocks but it's close. > + if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) { > + res[*res_idx].start = irq; > + res[*res_idx].end = irq; > + res[*res_idx].flags = IORESOURCE_IRQ; > + res[*res_idx].name = "overflow"; res[*res_idx] = DEFINE_RES_IRQ_NAMED(irq, 1, "overflow"); > + > + (*res_idx)++; Can roll this in as well. > + } > + > + flags = tbl_msc->error_interrupt_flags; > + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && > + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) > + aff = tbl_msc->error_interrupt_affinity; > + else > + aff = ~0; > + if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) { > + res[*res_idx].start = irq; > + res[*res_idx].end = irq; > + res[*res_idx].flags = IORESOURCE_IRQ; > + res[*res_idx].name = "error"; Similar to above. > + > + (*res_idx)++; > + } > +} > + > +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, > + struct platform_device *pdev, > + u32 *acpi_id) > +{ > + bool acpi_id_valid = false; > + struct acpi_device *buddy; > + char hid[16], uid[16]; > + int err; > + > + memset(&hid, 0, sizeof(hid)); > + memcpy(hid, &tbl_msc->hardware_id_linked_device, > + sizeof(tbl_msc->hardware_id_linked_device)); > + > + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { > + *acpi_id = tbl_msc->instance_id_linked_device; > + acpi_id_valid = true; > + } > + > + err = snprintf(uid, sizeof(uid), "%u", > + tbl_msc->instance_id_linked_device); > + if (err < 0 || err >= sizeof(uid)) Does snprintf() ever return < 0 ? It's documented as returning number of chars printed (without the NULL) so that can only be 0 or greater. Can it return >= sizeof(uid) ? Looks odd. + return acpi_id_valid; > + > + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); > + if (buddy) > + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); > + > + return acpi_id_valid; > +} > +static int __init _parse_table(struct acpi_table_header *table) > +{ > + char *table_end, *table_offset = (char *)(table + 1); > + struct property_entry props[4]; /* needs a sentinel */ > + struct acpi_mpam_msc_node *tbl_msc; > + int next_res, next_prop, err = 0; > + struct acpi_device *companion; > + struct platform_device *pdev; > + enum mpam_msc_iface iface; > + struct resource res[3]; > + char uid[16]; > + u32 acpi_id; > + > + table_end = (char *)table + table->length; > + > + while (table_offset < table_end) { > + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; > + table_offset += tbl_msc->length; > + > + /* > + * If any of the reserved fields are set, make no attempt to > + * parse the msc structure. This will prevent the driver from > + * probing all the MSC, meaning it can't discover the system > + * wide supported partid and pmg ranges. This avoids whatever > + * this MSC is truncating the partids and creating a screaming > + * error interrupt. > + */ > + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) > + continue; > + > + if (decode_interface_type(tbl_msc, &iface)) > + continue; > + > + next_res = 0; > + next_prop = 0; > + memset(res, 0, sizeof(res)); > + memset(props, 0, sizeof(props)); > + > + pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier); > + if (IS_ERR(pdev)) { returns NULL in at least some error cases (probably all, I'm just to lazy to check) > + err = PTR_ERR(pdev); > + break; > + } > + > + if (tbl_msc->length < sizeof(*tbl_msc)) { > + err = -EINVAL; > + break; > + } > + > + /* Some power management is described in the namespace: */ > + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); > + if (err > 0 && err < sizeof(uid)) { > + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); > + if (companion) > + ACPI_COMPANION_SET(&pdev->dev, companion); > + } > + > + if (iface == MPAM_IFACE_MMIO) { > + res[next_res].name = "MPAM:MSC"; > + res[next_res].start = tbl_msc->base_address; > + res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1; > + res[next_res].flags = IORESOURCE_MEM; > + next_res++; DEFINE_RES_MEM_NAMED()? > + } else if (iface == MPAM_IFACE_PCC) { > + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", > + tbl_msc->base_address); > + next_prop++; > + } > + > + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); > + err = platform_device_add_resources(pdev, res, next_res); > + if (err) > + break; > + > + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", > + tbl_msc->max_nrdy_usec); > + > + /* > + * The MSC's CPU affinity is described via its linked power > + * management device, but only if it points at a Processor or > + * Processor Container. > + */ > + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) { > + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", > + acpi_id); > + } > + > + err = device_create_managed_software_node(&pdev->dev, props, > + NULL); > + if (err) > + break; > + > + /* Come back later if you want the RIS too */ > + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); > + if (err) > + break; > + > + platform_device_add(pdev); Can fail. > + } > + > + if (err) > + platform_device_put(pdev); > + > + return err; > +} > + > +static struct acpi_table_header *get_table(void) > +{ > + struct acpi_table_header *table; > + acpi_status status; > + > + if (acpi_disabled || !system_supports_mpam()) > + return NULL; > + > + status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); > + if (ACPI_FAILURE(status)) > + return NULL; > + > + if (table->revision != 1) > + return NULL; > + > + return table; > +} > + > +static int __init acpi_mpam_parse(void) > +{ > + struct acpi_table_header *mpam; > + int err; > + > + mpam = get_table(); > + if (!mpam) > + return 0; Just what I was suggesting for the PPTT case in early patches. Nice :) > + > + err = _parse_table(mpam); > + acpi_put_table(mpam); > + > + return err; > +} > + > +static int _count_msc(struct acpi_table_header *table) > +{ > + char *table_end, *table_offset = (char *)(table + 1); > + struct acpi_mpam_msc_node *tbl_msc; > + int ret = 0; Call it count as it only ever contains the count? > + > + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; > + table_end = (char *)table + table->length; > + > + while (table_offset < table_end) { > + if (tbl_msc->length < sizeof(*tbl_msc)) > + return -EINVAL; > + > + ret++; count++ would feel more natural here. > + > + table_offset += tbl_msc->length; > + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; > + } > + > + return ret; > +} That's all I have time for today. Will get to the rest of the series soonish. Jonathan
Hi Jonathan, On 16/07/2025 18:07, Jonathan Cameron wrote: > On Fri, 11 Jul 2025 18:36:22 +0000 > James Morse <james.morse@arm.com> wrote: > >> Add code to parse the arm64 specific MPAM table, looking up the cache >> level from the PPTT and feeding the end result into the MPAM driver. > Throw in a link to the spec perhaps? Particularly useful to know which > version this was written against when reviewing it. Will do. Ben has already pointed out it wasn't written against the latest version... >> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile >> index 05ecde9eaabe..27b872249baa 100644 >> --- a/drivers/acpi/arm64/Makefile >> +++ b/drivers/acpi/arm64/Makefile >> @@ -6,5 +6,6 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o >> obj-$(CONFIG_ACPI_IORT) += iort.o >> obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o >> obj-$(CONFIG_ARM_AMBA) += amba.o >> +obj-$(CONFIG_ACPI_MPAM) += mpam.o > > Keep it with the ACPI ones? Sure, > There doesn't seem to be a lot of order in here > though so I guess maybe there is logic behind putting it here I'm missing. merge conflicts over many years always put it at the bottom of the file. I at least kept the conditional ones together. Moving it up lets the table 'drivers' appear together in alphabetical order. >> diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c >> new file mode 100644 >> index 000000000000..f4791bac9a2a >> --- /dev/null >> +++ b/drivers/acpi/arm64/mpam.c >> @@ -0,0 +1,365 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +// Copyright (C) 2025 Arm Ltd. >> + >> +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ >> + >> +#define pr_fmt(fmt) "ACPI MPAM: " fmt >> + >> +#include <linux/acpi.h> >> +#include <linux/arm_mpam.h> >> +#include <linux/cpu.h> >> +#include <linux/cpumask.h> >> +#include <linux/platform_device.h> >> + >> +#include <acpi/processor.h> >> + >> +/* Flags for acpi_table_mpam_msc.*_interrupt_flags */ > References.. I'm looking at 3.0-alpha table 5 to check this. > I can see why you might be reluctant to point at an alpha if that > is what you are using ;) I did this against the released(?) version 2.0. (aka table revision 1). I'll add references based on the v3 beta ... it looks like that defines the mmio-size=0 behaviour and the pcc stuff. The mmio-size is harmless - we'd need to handle that as an error anyay. I don't want to touch the pcc thing until there is a real platform that needs it, and the spec is finished... e.g. | * See 2.1.1 Interrupt Flags, Table 5, of DEN0065B_MPAM_ACPI_3.0-bet. >> +#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1 >> +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3 << 1) > GENMASK(3, 2) would be my preference for how to do masks in new code. GENMASK(2, 1), but yes. >> +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 >> +#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER BIT(3) >> +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) >> + >> +static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, >> + int *irq, u32 processor_container_uid) >> +{ >> + int sense; >> + >> + if (!intid) >> + return false; >> + >> + /* 0 in this field indicates a wired interrupt */ >> + if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK) > I'd prefer more explicit code (and probably no comment) > > if (FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_TYPE_MASK) != > ACPI_MPAM_MSC_IRQ_TYPE_WIRED) > return false; Sure, >> + return false; >> + >> + if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE) >> + sense = ACPI_EDGE_SENSITIVE; >> + else >> + sense = ACPI_LEVEL_SENSITIVE; > > If the spec is supposed to be using standard ACPI_* types for this field > (I don't think the connection is explicitly documented though) then Sent as feedback on the spec. (I didn't realise those were standard!) > sense = FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_MODE_MASK); > Assuming a change to define the mask and rely on the ACPI defs for the values > > This one is entirely up to you. >> + >> + /* >> + * If the GSI is in the GIC's PPI range, try and create a partitioned >> + * percpu interrupt. >> + */ >> + if (16 <= intid && intid < 32 && processor_container_uid != ~0) { >> + pr_err_once("Partitioned interrupts not supported\n"); >> + return false; >> + } >> + >> + *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH); >> + if (*irq <= 0) { >> + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", >> + intid); >> + return false; >> + } >> + >> + return true; >> +} >> + >> +static void acpi_mpam_parse_irqs(struct platform_device *pdev, >> + struct acpi_mpam_msc_node *tbl_msc, >> + struct resource *res, int *res_idx) >> +{ >> + u32 flags, aff = ~0; >> + int irq; >> + >> + flags = tbl_msc->overflow_interrupt_flags; >> + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && >> + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) >> + aff = tbl_msc->overflow_interrupt_affinity; > Just to make the two cases look the same I'd do > > else > aff = ~0; > > here as well and not initialize above. It's not quite worth using > a helper function for these two identical blocks but it's close. > >> + if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) { >> + res[*res_idx].start = irq; >> + res[*res_idx].end = irq; >> + res[*res_idx].flags = IORESOURCE_IRQ; >> + res[*res_idx].name = "overflow"; > > res[*res_idx] = DEFINE_RES_IRQ_NAMED(irq, 1, "overflow"); Handy, not seen that before. >> + >> + (*res_idx)++; > Can roll this in as well. >> + } >> + >> + flags = tbl_msc->error_interrupt_flags; >> + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && >> + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) >> + aff = tbl_msc->error_interrupt_affinity; >> + else >> + aff = ~0; >> + if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) { >> + res[*res_idx].start = irq; >> + res[*res_idx].end = irq; >> + res[*res_idx].flags = IORESOURCE_IRQ; >> + res[*res_idx].name = "error"; > > Similar to above. Yup, >> + >> + (*res_idx)++; >> + } >> +} >> + > > >> +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, >> + struct platform_device *pdev, >> + u32 *acpi_id) >> +{ >> + bool acpi_id_valid = false; >> + struct acpi_device *buddy; >> + char hid[16], uid[16]; >> + int err; >> + >> + memset(&hid, 0, sizeof(hid)); >> + memcpy(hid, &tbl_msc->hardware_id_linked_device, >> + sizeof(tbl_msc->hardware_id_linked_device)); >> + >> + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { >> + *acpi_id = tbl_msc->instance_id_linked_device; >> + acpi_id_valid = true; >> + } >> + >> + err = snprintf(uid, sizeof(uid), "%u", >> + tbl_msc->instance_id_linked_device); >> + if (err < 0 || err >= sizeof(uid)) > Does snprintf() ever return < 0 ? It's documented as returning > number of chars printed (without the NULL) so that can only be 0 or > greater. That looks like paranoia around string parsing in C, and snprintf() returning an int. I've removed the first half, > Can it return >= sizeof(uid) ? Looks odd. More paranoia, it should be impossible given the arguments, but the documentation has: | If the return is greater than or equal to @size, the resulting string is truncated. If the string is truncated, there is no reason to feed it into acpi_dev_get_first_match_dev(). > + return acpi_id_valid; >> + >> + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); >> + if (buddy) >> + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); >> + >> + return acpi_id_valid; >> +} > >> +static int __init _parse_table(struct acpi_table_header *table) >> +{ >> + char *table_end, *table_offset = (char *)(table + 1); >> + struct property_entry props[4]; /* needs a sentinel */ >> + struct acpi_mpam_msc_node *tbl_msc; >> + int next_res, next_prop, err = 0; >> + struct acpi_device *companion; >> + struct platform_device *pdev; >> + enum mpam_msc_iface iface; >> + struct resource res[3]; >> + char uid[16]; >> + u32 acpi_id; >> + >> + table_end = (char *)table + table->length; >> + >> + while (table_offset < table_end) { >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + table_offset += tbl_msc->length; >> + >> + /* >> + * If any of the reserved fields are set, make no attempt to >> + * parse the msc structure. This will prevent the driver from >> + * probing all the MSC, meaning it can't discover the system >> + * wide supported partid and pmg ranges. This avoids whatever >> + * this MSC is truncating the partids and creating a screaming >> + * error interrupt. >> + */ >> + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) >> + continue; >> + >> + if (decode_interface_type(tbl_msc, &iface)) >> + continue; >> + >> + next_res = 0; >> + next_prop = 0; >> + memset(res, 0, sizeof(res)); >> + memset(props, 0, sizeof(props)); >> + >> + pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier); >> + if (IS_ERR(pdev)) { > returns NULL in at least some error cases (probably all, I'm just to lazy to check) So it does ... Fixed. >> + err = PTR_ERR(pdev); >> + break; >> + } >> + >> + if (tbl_msc->length < sizeof(*tbl_msc)) { >> + err = -EINVAL; >> + break; >> + } >> + >> + /* Some power management is described in the namespace: */ >> + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); >> + if (err > 0 && err < sizeof(uid)) { >> + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); >> + if (companion) >> + ACPI_COMPANION_SET(&pdev->dev, companion); >> + } >> + >> + if (iface == MPAM_IFACE_MMIO) { >> + res[next_res].name = "MPAM:MSC"; >> + res[next_res].start = tbl_msc->base_address; >> + res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1; >> + res[next_res].flags = IORESOURCE_MEM; >> + next_res++; > DEFINE_RES_MEM_NAMED()? Done, >> + } else if (iface == MPAM_IFACE_PCC) { >> + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", >> + tbl_msc->base_address); >> + next_prop++; >> + } >> + >> + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); >> + err = platform_device_add_resources(pdev, res, next_res); >> + if (err) >> + break; >> + >> + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", >> + tbl_msc->max_nrdy_usec); >> + >> + /* >> + * The MSC's CPU affinity is described via its linked power >> + * management device, but only if it points at a Processor or >> + * Processor Container. >> + */ >> + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) { >> + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", >> + acpi_id); >> + } >> + >> + err = device_create_managed_software_node(&pdev->dev, props, >> + NULL); >> + if (err) >> + break; >> + >> + /* Come back later if you want the RIS too */ >> + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); >> + if (err) >> + break; >> + >> + platform_device_add(pdev); > Can fail. Fixed, >> + } >> + >> + if (err) >> + platform_device_put(pdev); >> + >> + return err; >> +} >> +static int _count_msc(struct acpi_table_header *table) >> +{ >> + char *table_end, *table_offset = (char *)(table + 1); >> + struct acpi_mpam_msc_node *tbl_msc; >> + int ret = 0; > Call it count as it only ever contains the count? Sure, >> + >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + table_end = (char *)table + table->length; >> + >> + while (table_offset < table_end) { >> + if (tbl_msc->length < sizeof(*tbl_msc)) >> + return -EINVAL; >> + >> + ret++; > > count++ would feel more natural here. > >> + >> + table_offset += tbl_msc->length; >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + } >> + >> + return ret; >> +} > That's all I have time for today. Will get to the rest of the series soonish. Thanks for taking a look! James
> > +static struct acpi_table_header *get_table(void) > > +{ > > + struct acpi_table_header *table; > > + acpi_status status; > > + > > + if (acpi_disabled || !system_supports_mpam()) > > + return NULL; > > + > > + status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); > > + if (ACPI_FAILURE(status)) > > + return NULL; > > + > > + if (table->revision != 1) Missing an acpi_put_table() I'm messing around with ACQUIRE() that is queued in the CXL tree for the coming merge window and noticed this. https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/log/?h=for-6.17/cleanup-acquire Interestingly this is a new corner case where we want conditional locking style handling but with return_ptr() style handling. Maybe too much of a niche to bother with infrastructure. Worth noting though that one layer up it is probably worth something like: DEFINE_FREE(acpi_table_mpam, struct acpi_table_header *, if (_T) acpi_put_table(_T)); That enables nice clean code like: static int __init acpi_mpam_parse(void) { struct acpi_table_header *mpam = __free(acpi_table_mpam) = get_table(); if (!mpam) return 0; return _parse_table; } This series was big enough that I'm spinning a single 'suggested changes' patch on top of it that includes stuff like this. Might take another day or so. Jonathan > > + return NULL; > > + > > + return table; > > +}
Hi Jonathan, On 28/07/2025 11:08, Jonathan Cameron wrote: >>> +static struct acpi_table_header *get_table(void) >>> +{ >>> + struct acpi_table_header *table; >>> + acpi_status status; >>> + >>> + if (acpi_disabled || !system_supports_mpam()) >>> + return NULL; >>> + >>> + status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); >>> + if (ACPI_FAILURE(status)) >>> + return NULL; >>> + >>> + if (table->revision != 1) > > Missing an acpi_put_table() Oops, > I'm messing around with ACQUIRE() that is queued in the CXL tree > for the coming merge window and noticed this. > https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/log/?h=for-6.17/cleanup-acquire (not more C++!) > Interestingly this is a new corner case where we want conditional locking > style handling but with return_ptr() style handling. Maybe too much of a niche > to bother with infrastructure. > > Worth noting though that one layer up it is probably worth something like: > > DEFINE_FREE(acpi_table_mpam, struct acpi_table_header *, if (_T) acpi_put_table(_T)); > > That enables nice clean code like: > > > static int __init acpi_mpam_parse(void) > { > struct acpi_table_header *mpam = __free(acpi_table_mpam) = get_table(); > > if (!mpam) > return 0; > > return _parse_table; > } I've got bits of that from your PPTT suggestions. I ended up folding the get_table() helper in here. count_msc() gets the same treatment and the cleanup thing lets _count_msc() be folded into it. Thanks, James > This series was big enough that I'm spinning a single 'suggested changes' > patch on top of it that includes stuff like this. Might take another day or so. > > Jonathan
Hi James, Jonathan, On 7/16/25 18:07, Jonathan Cameron wrote: > On Fri, 11 Jul 2025 18:36:22 +0000 > James Morse <james.morse@arm.com> wrote: > >> Add code to parse the arm64 specific MPAM table, looking up the cache >> level from the PPTT and feeding the end result into the MPAM driver. > > Throw in a link to the spec perhaps? Particularly useful to know which > version this was written against when reviewing it. As I comment below this code checks the table revision is 1 and so we can assume it was written against version 2 of the spec. As of Monday, there is a new version hot off the press, https://developer.arm.com/documentation/den0065/3-0bet/?lang=en which introduces an "MMIO size" field to allow for disabled nodes. This should be considered here to avoid advertising msc that aren't present. > >> >> CC: Carl Worth <carl@os.amperecomputing.com> >> Signed-off-by: James Morse <james.morse@arm.com> >> --- >> arch/arm64/Kconfig | 1 + >> drivers/acpi/arm64/Kconfig | 3 + >> drivers/acpi/arm64/Makefile | 1 + >> drivers/acpi/arm64/mpam.c | 365 ++++++++++++++++++++++++++++++++++++ >> drivers/acpi/tables.c | 2 +- >> include/linux/arm_mpam.h | 46 +++++ >> 6 files changed, 417 insertions(+), 1 deletion(-) >> create mode 100644 drivers/acpi/arm64/mpam.c >> create mode 100644 include/linux/arm_mpam.h >> > >> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile >> index 05ecde9eaabe..27b872249baa 100644 >> --- a/drivers/acpi/arm64/Makefile >> +++ b/drivers/acpi/arm64/Makefile >> @@ -6,5 +6,6 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o >> obj-$(CONFIG_ACPI_IORT) += iort.o >> obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o >> obj-$(CONFIG_ARM_AMBA) += amba.o >> +obj-$(CONFIG_ACPI_MPAM) += mpam.o > > Keep it with the ACPI ones? There doesn't seem to be a lot of order in here > though so I guess maybe there is logic behind putting it here I'm missing. > >> obj-y += dma.o init.o >> obj-y += thermal_cpufreq.o >> diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c >> new file mode 100644 >> index 000000000000..f4791bac9a2a >> --- /dev/null >> +++ b/drivers/acpi/arm64/mpam.c >> @@ -0,0 +1,365 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +// Copyright (C) 2025 Arm Ltd. >> + >> +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ >> + >> +#define pr_fmt(fmt) "ACPI MPAM: " fmt >> + >> +#include <linux/acpi.h> >> +#include <linux/arm_mpam.h> >> +#include <linux/cpu.h> >> +#include <linux/cpumask.h> >> +#include <linux/platform_device.h> >> + >> +#include <acpi/processor.h> >> + >> +/* Flags for acpi_table_mpam_msc.*_interrupt_flags */ > > References.. I'm looking at 3.0-alpha table 5 to check this. > I can see why you might be reluctant to point at an alpha if that > is what you are using ;) > > >> +#define ACPI_MPAM_MSC_IRQ_MODE_EDGE 1 >> +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK (3 << 1) > > GENMASK(3, 2) would be my preference for how to do masks in new code. > >> +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 >> +#define ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER BIT(3) >> +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) >> + >> +static bool frob_irq(struct platform_device *pdev, int intid, u32 flags, >> + int *irq, u32 processor_container_uid) >> +{ >> + int sense; >> + >> + if (!intid) >> + return false; >> + >> + /* 0 in this field indicates a wired interrupt */ >> + if (flags & ACPI_MPAM_MSC_IRQ_TYPE_MASK) > I'd prefer more explicit code (and probably no comment) > > if (FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_TYPE_MASK) != > ACPI_MPAM_MSC_IRQ_TYPE_WIRED) > return false; > >> + return false; >> + >> + if (flags & ACPI_MPAM_MSC_IRQ_MODE_EDGE) >> + sense = ACPI_EDGE_SENSITIVE; >> + else >> + sense = ACPI_LEVEL_SENSITIVE; > > If the spec is supposed to be using standard ACPI_* types for this field > (I don't think the connection is explicitly documented though) then > > sense = FIELD_GET(flags, ACPI_MPAM_MSC_IRQ_MODE_MASK); > Assuming a change to define the mask and rely on the ACPI defs for the values > > This one is entirely up to you. > >> + >> + /* >> + * If the GSI is in the GIC's PPI range, try and create a partitioned >> + * percpu interrupt. >> + */ >> + if (16 <= intid && intid < 32 && processor_container_uid != ~0) { >> + pr_err_once("Partitioned interrupts not supported\n"); >> + return false; >> + } >> + >> + *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH); >> + if (*irq <= 0) { >> + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", >> + intid); >> + return false; >> + } >> + >> + return true; >> +} >> + >> +static void acpi_mpam_parse_irqs(struct platform_device *pdev, >> + struct acpi_mpam_msc_node *tbl_msc, >> + struct resource *res, int *res_idx) >> +{ >> + u32 flags, aff = ~0; >> + int irq; >> + >> + flags = tbl_msc->overflow_interrupt_flags; >> + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && >> + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) >> + aff = tbl_msc->overflow_interrupt_affinity; > Just to make the two cases look the same I'd do > > else > aff = ~0; > > here as well and not initialize above. It's not quite worth using > a helper function for these two identical blocks but it's close. > >> + if (frob_irq(pdev, tbl_msc->overflow_interrupt, flags, &irq, aff)) { >> + res[*res_idx].start = irq; >> + res[*res_idx].end = irq; >> + res[*res_idx].flags = IORESOURCE_IRQ; >> + res[*res_idx].name = "overflow"; > > res[*res_idx] = DEFINE_RES_IRQ_NAMED(irq, 1, "overflow"); >> + >> + (*res_idx)++; > Can roll this in as well. > >> + } >> + >> + flags = tbl_msc->error_interrupt_flags; >> + if (flags & ACPI_MPAM_MSC_IRQ_AFFINITY_VALID && >> + flags & ACPI_MPAM_MSC_IRQ_AFFINITY_PROCESSOR_CONTAINER) >> + aff = tbl_msc->error_interrupt_affinity; >> + else >> + aff = ~0; >> + if (frob_irq(pdev, tbl_msc->error_interrupt, flags, &irq, aff)) { >> + res[*res_idx].start = irq; >> + res[*res_idx].end = irq; >> + res[*res_idx].flags = IORESOURCE_IRQ; >> + res[*res_idx].name = "error"; > > Similar to above. > >> + >> + (*res_idx)++; >> + } >> +} >> + > > >> +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, >> + struct platform_device *pdev, >> + u32 *acpi_id) >> +{ >> + bool acpi_id_valid = false; >> + struct acpi_device *buddy; >> + char hid[16], uid[16]; >> + int err; >> + >> + memset(&hid, 0, sizeof(hid)); >> + memcpy(hid, &tbl_msc->hardware_id_linked_device, >> + sizeof(tbl_msc->hardware_id_linked_device)); >> + >> + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { >> + *acpi_id = tbl_msc->instance_id_linked_device; >> + acpi_id_valid = true; >> + } >> + >> + err = snprintf(uid, sizeof(uid), "%u", >> + tbl_msc->instance_id_linked_device); >> + if (err < 0 || err >= sizeof(uid)) > > Does snprintf() ever return < 0 ? It's documented as returning > number of chars printed (without the NULL) so that can only be 0 or > greater. > > Can it return >= sizeof(uid) ? Looks odd. > > + return acpi_id_valid; >> + >> + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); >> + if (buddy) >> + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); >> + >> + return acpi_id_valid; >> +} > >> +static int __init _parse_table(struct acpi_table_header *table) >> +{ >> + char *table_end, *table_offset = (char *)(table + 1); >> + struct property_entry props[4]; /* needs a sentinel */ >> + struct acpi_mpam_msc_node *tbl_msc; >> + int next_res, next_prop, err = 0; >> + struct acpi_device *companion; >> + struct platform_device *pdev; >> + enum mpam_msc_iface iface; >> + struct resource res[3]; >> + char uid[16]; >> + u32 acpi_id; >> + >> + table_end = (char *)table + table->length; >> + >> + while (table_offset < table_end) { >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + table_offset += tbl_msc->length; >> + >> + /* >> + * If any of the reserved fields are set, make no attempt to >> + * parse the msc structure. This will prevent the driver from >> + * probing all the MSC, meaning it can't discover the system >> + * wide supported partid and pmg ranges. This avoids whatever >> + * this MSC is truncating the partids and creating a screaming >> + * error interrupt. >> + */ >> + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) >> + continue; >> + >> + if (decode_interface_type(tbl_msc, &iface)) >> + continue; >> + >> + next_res = 0; >> + next_prop = 0; >> + memset(res, 0, sizeof(res)); >> + memset(props, 0, sizeof(props)); >> + >> + pdev = platform_device_alloc("mpam_msc", tbl_msc->identifier); >> + if (IS_ERR(pdev)) { > > returns NULL in at least some error cases (probably all, I'm just to lazy to check) > >> + err = PTR_ERR(pdev); >> + break; >> + } >> + >> + if (tbl_msc->length < sizeof(*tbl_msc)) { >> + err = -EINVAL; >> + break; >> + } >> + >> + /* Some power management is described in the namespace: */ >> + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); >> + if (err > 0 && err < sizeof(uid)) { >> + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); >> + if (companion) >> + ACPI_COMPANION_SET(&pdev->dev, companion); >> + } >> + >> + if (iface == MPAM_IFACE_MMIO) { >> + res[next_res].name = "MPAM:MSC"; >> + res[next_res].start = tbl_msc->base_address; >> + res[next_res].end = tbl_msc->base_address + tbl_msc->mmio_size - 1; >> + res[next_res].flags = IORESOURCE_MEM; >> + next_res++; > > DEFINE_RES_MEM_NAMED()? > >> + } else if (iface == MPAM_IFACE_PCC) { >> + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", >> + tbl_msc->base_address); >> + next_prop++; >> + } >> + >> + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); >> + err = platform_device_add_resources(pdev, res, next_res); >> + if (err) >> + break; >> + >> + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", >> + tbl_msc->max_nrdy_usec); >> + >> + /* >> + * The MSC's CPU affinity is described via its linked power >> + * management device, but only if it points at a Processor or >> + * Processor Container. >> + */ >> + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) { >> + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", >> + acpi_id); >> + } >> + >> + err = device_create_managed_software_node(&pdev->dev, props, >> + NULL); >> + if (err) >> + break; >> + >> + /* Come back later if you want the RIS too */ >> + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); >> + if (err) >> + break; >> + >> + platform_device_add(pdev); > > Can fail. > >> + } >> + >> + if (err) >> + platform_device_put(pdev); >> + >> + return err; >> +} >> + >> +static struct acpi_table_header *get_table(void) >> +{ >> + struct acpi_table_header *table; >> + acpi_status status; >> + >> + if (acpi_disabled || !system_supports_mpam()) >> + return NULL; >> + >> + status = acpi_get_table(ACPI_SIG_MPAM, 0, &table); >> + if (ACPI_FAILURE(status)) >> + return NULL; >> + >> + if (table->revision != 1) >> + return NULL; Indicates that this was written against version 2 of the spec. >> + >> + return table; >> +} >> + >> +static int __init acpi_mpam_parse(void) >> +{ >> + struct acpi_table_header *mpam; >> + int err; >> + >> + mpam = get_table(); >> + if (!mpam) >> + return 0; > > Just what I was suggesting for the PPTT case in early patches. Nice :) > >> + >> + err = _parse_table(mpam); >> + acpi_put_table(mpam); >> + >> + return err; >> +} >> + >> +static int _count_msc(struct acpi_table_header *table) >> +{ >> + char *table_end, *table_offset = (char *)(table + 1); >> + struct acpi_mpam_msc_node *tbl_msc; >> + int ret = 0; > > Call it count as it only ever contains the count? > >> + >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + table_end = (char *)table + table->length; >> + >> + while (table_offset < table_end) { >> + if (tbl_msc->length < sizeof(*tbl_msc)) >> + return -EINVAL; >> + >> + ret++; > > count++ would feel more natural here. > >> + >> + table_offset += tbl_msc->length; >> + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; >> + } >> + >> + return ret; >> +} > > That's all I have time for today. Will get to the rest of the series soonish. > > Jonathan > > Thanks, Ben
Hi Ben, On 23/07/2025 17:39, Ben Horgan wrote: > On 7/16/25 18:07, Jonathan Cameron wrote: >> On Fri, 11 Jul 2025 18:36:22 +0000 >> James Morse <james.morse@arm.com> wrote: >> >>> Add code to parse the arm64 specific MPAM table, looking up the cache >>> level from the PPTT and feeding the end result into the MPAM driver. >> >> Throw in a link to the spec perhaps? Particularly useful to know which >> version this was written against when reviewing it. > As I comment below this code checks the table revision is 1 and so we can assume it was > written against version 2 of the spec. As of Monday, there is a new version hot off the > press, > https://developer.arm.com/documentation/den0065/3-0bet/?lang=en which introduces an "MMIO > size" field to allow for disabled nodes. This should be considered here to avoid > advertising msc that aren't present. Sure. Bit of an unfortunate race with the spec people there! Added as: --------------------%<-------------------- diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c index 9ff5a6df9f1b..d8c6224a76f8 100644 --- a/drivers/acpi/arm64/mpam.c +++ b/drivers/acpi/arm64/mpam.c @@ -202,6 +202,9 @@ static int __init _parse_table(struct acpi_table_header *table) if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) continue; + if (!tbl_msc->mmio_size) + continue; + if (decode_interface_type(tbl_msc, &iface)) continue; @@ -290,7 +293,7 @@ static struct acpi_table_header *get_table(void) if (ACPI_FAILURE(status)) return NULL; - if (table->revision != 1) + if (table->revision < 1) return NULL; return table; @@ -321,6 +324,9 @@ static int _count_msc(struct acpi_table_header *table) table_end = (char *)table + table->length; while (table_offset < table_end) { + if (!tbl_msc->mmio_size) + continue; + if (tbl_msc->length < sizeof(*tbl_msc)) return -EINVAL; --------------------%<-------------------- Amusingly, PCC also defines mmio_size==0 as disabled, so _count_msc() doesn't need to know what kind of thing this is. In principle they could change this as its beta, but a zero sized MSC should probably be treated as an error anyway. Thanks, James
Hi James, On 8/5/25 18:07, James Morse wrote: > Hi Ben, > > On 23/07/2025 17:39, Ben Horgan wrote: >> On 7/16/25 18:07, Jonathan Cameron wrote: >>> On Fri, 11 Jul 2025 18:36:22 +0000 >>> James Morse <james.morse@arm.com> wrote: >>> >>>> Add code to parse the arm64 specific MPAM table, looking up the cache >>>> level from the PPTT and feeding the end result into the MPAM driver. >>> >>> Throw in a link to the spec perhaps? Particularly useful to know which >>> version this was written against when reviewing it. > >> As I comment below this code checks the table revision is 1 and so we can assume it was >> written against version 2 of the spec. As of Monday, there is a new version hot off the >> press, >> https://developer.arm.com/documentation/den0065/3-0bet/?lang=en which introduces an "MMIO >> size" field to allow for disabled nodes. This should be considered here to avoid >> advertising msc that aren't present. > > Sure. Bit of an unfortunate race with the spec people there! > > Added as: > --------------------%<-------------------- > diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c > index 9ff5a6df9f1b..d8c6224a76f8 100644 > --- a/drivers/acpi/arm64/mpam.c > +++ b/drivers/acpi/arm64/mpam.c > @@ -202,6 +202,9 @@ static int __init _parse_table(struct acpi_table_header *table) > if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) > continue; > > + if (!tbl_msc->mmio_size) > + continue; > + > if (decode_interface_type(tbl_msc, &iface)) > continue; > > @@ -290,7 +293,7 @@ static struct acpi_table_header *get_table(void) > if (ACPI_FAILURE(status)) > return NULL; > > - if (table->revision != 1) > + if (table->revision < 1) > return NULL; > > return table; > @@ -321,6 +324,9 @@ static int _count_msc(struct acpi_table_header *table) > table_end = (char *)table + table->length; > > while (table_offset < table_end) { > + if (!tbl_msc->mmio_size) > + continue; > + > if (tbl_msc->length < sizeof(*tbl_msc)) > return -EINVAL; > --------------------%<-------------------- This seems fine as long as any later table revisions are guaranteed to be backwards compatible. > > Amusingly, PCC also defines mmio_size==0 as disabled, so _count_msc() doesn't need to know > what kind of thing this is. In principle they could change this as its beta, but a zero > sized MSC should probably be treated as an error anyway. > > > Thanks, > > James Thanks, Ben
© 2016 - 2025 Red Hat, Inc.