Perf and resctrl users need an enumeration of which memory addresses
are bound to which "region" tag.
Parse the ACPI MRRM table and add /sys entries for each memory range
describing base address, length, NUMA node, and which region tags apply
for same-socket and cross-socket access.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
drivers/acpi/acpi_mrrm.c | 143 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 142 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c
index ab8022e58da5..f04645a0407f 100644
--- a/drivers/acpi/acpi_mrrm.c
+++ b/drivers/acpi/acpi_mrrm.c
@@ -3,12 +3,16 @@
* Copyright (c) 2025, Intel Corporation.
*
* Memory Range and Region Mapping (MRRM) structure
+ *
+ * Parse and report the platform's MRRM table in /sys.
*/
#define pr_fmt(fmt) "acpi/mrrm: " fmt
#include <linux/acpi.h>
#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
static int max_mem_region = -ENOENT;
@@ -18,25 +22,162 @@ int acpi_mrrm_max_mem_region(void)
return max_mem_region;
}
+struct mrrm_mem_range_entry {
+ u64 base;
+ u64 length;
+ int node;
+ u8 local_region_id;
+ u8 remote_region_id;
+};
+
+static struct mrrm_mem_range_entry *mrrm_mem_range_entry;
+static u32 mrrm_mem_entry_num;
+
+static int get_node_num(struct mrrm_mem_range_entry *e)
+{
+ unsigned int nid;
+
+ for_each_online_node(nid) {
+ for (int z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = NODE_DATA(nid)->node_zones + z;
+
+ if (!populated_zone(zone))
+ continue;
+ if (zone_intersects(zone, PHYS_PFN(e->base), PHYS_PFN(e->length)))
+ return zone_to_nid(zone);
+ }
+ }
+
+ return -ENOENT;
+}
+
static __init int acpi_parse_mrrm(struct acpi_table_header *table)
{
+ struct acpi_mrrm_mem_range_entry *mre_entry;
struct acpi_table_mrrm *mrrm;
+ void *mre, *mrrm_end;
+ int mre_count = 0;
mrrm = (struct acpi_table_mrrm *)table;
if (!mrrm)
return -ENODEV;
+ if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS)
+ return -EOPNOTSUPP;
+
+ mrrm_end = (void *)mrrm + mrrm->header.length - 1;
+ mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+ while (mre < mrrm_end) {
+ mre_entry = mre;
+ mre_count++;
+ mre += mre_entry->header.length;
+ }
+ if (!mre_count) {
+ pr_info(FW_BUG "No ranges listed in MRRM table\n");
+ return -EINVAL;
+ }
+
+ mrrm_mem_range_entry = kmalloc_array(mre_count, sizeof(*mrrm_mem_range_entry),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mrrm_mem_range_entry)
+ return -ENOMEM;
+
+ mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+ while (mre < mrrm_end) {
+ struct mrrm_mem_range_entry *e;
+
+ mre_entry = mre;
+ e = mrrm_mem_range_entry + mrrm_mem_entry_num;
+
+ e->base = mre_entry->addr_base;
+ e->length = mre_entry->addr_len;
+ e->node = get_node_num(e);
+
+ if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL)
+ e->local_region_id = mre_entry->local_region_id;
+ else
+ e->local_region_id = -1;
+ if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE)
+ e->remote_region_id = mre_entry->remote_region_id;
+ else
+ e->remote_region_id = -1;
+
+ mrrm_mem_entry_num++;
+ mre += mre_entry->header.length;
+ }
+
max_mem_region = mrrm->max_mem_region;
return 0;
}
+#define RANGE_ATTR(name, fmt) \
+static ssize_t name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ struct mrrm_mem_range_entry *mre; \
+ const char *kname = kobject_name(kobj); \
+ int n, ret; \
+ \
+ ret = kstrtoint(kname + 5, 10, &n); \
+ if (ret) \
+ return ret; \
+ \
+ mre = mrrm_mem_range_entry + n; \
+ \
+ return sysfs_emit(buf, fmt, mre->name); \
+} \
+static struct kobj_attribute name##_attr = __ATTR_RO(name)
+
+RANGE_ATTR(base, "0x%llx\n");
+RANGE_ATTR(length, "0x%llx\n");
+RANGE_ATTR(node, "%d\n");
+RANGE_ATTR(local_region_id, "%d\n");
+RANGE_ATTR(remote_region_id, "%d\n");
+
+static struct attribute *memory_range_attrs[] = {
+ &base_attr.attr,
+ &length_attr.attr,
+ &node_attr.attr,
+ &local_region_id_attr.attr,
+ &remote_region_id_attr.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(memory_range);
+
+static __init int add_boot_memory_ranges(void)
+{
+ struct kobject *pkobj, *kobj;
+ int ret = -EINVAL;
+ char *name;
+
+ pkobj = kobject_create_and_add("memory_ranges", acpi_kobj);
+
+ for (int i = 0; i < mrrm_mem_entry_num; i++) {
+ name = kasprintf(GFP_KERNEL, "range%d", i);
+ if (!name)
+ break;
+
+ kobj = kobject_create_and_add(name, pkobj);
+
+ ret = sysfs_create_groups(kobj, memory_range_groups);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
static __init int mrrm_init(void)
{
int ret;
ret = acpi_table_parse(ACPI_SIG_MRRM, acpi_parse_mrrm);
- return ret;
+ if (ret < 0)
+ return ret;
+
+ return add_boot_memory_ranges();
}
device_initcall(mrrm_init);
--
2.48.1
Hi, Tony,
On 4/29/25 13:24, Tony Luck wrote:
> Perf and resctrl users need an enumeration of which memory addresses
> are bound to which "region" tag.
>
> Parse the ACPI MRRM table and add /sys entries for each memory range
> describing base address, length, NUMA node, and which region tags apply
> for same-socket and cross-socket access.
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> drivers/acpi/acpi_mrrm.c | 143 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 142 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c
> index ab8022e58da5..f04645a0407f 100644
> --- a/drivers/acpi/acpi_mrrm.c
> +++ b/drivers/acpi/acpi_mrrm.c
> @@ -3,12 +3,16 @@
> * Copyright (c) 2025, Intel Corporation.
> *
> * Memory Range and Region Mapping (MRRM) structure
> + *
> + * Parse and report the platform's MRRM table in /sys.
> */
>
> #define pr_fmt(fmt) "acpi/mrrm: " fmt
>
> #include <linux/acpi.h>
> #include <linux/init.h>
> +#include <linux/string.h>
> +#include <linux/sysfs.h>
>
> static int max_mem_region = -ENOENT;
>
> @@ -18,25 +22,162 @@ int acpi_mrrm_max_mem_region(void)
> return max_mem_region;
> }
>
> +struct mrrm_mem_range_entry {
> + u64 base;
> + u64 length;
> + int node;
> + u8 local_region_id;
> + u8 remote_region_id;
> +};
> +
> +static struct mrrm_mem_range_entry *mrrm_mem_range_entry;
> +static u32 mrrm_mem_entry_num;
> +
> +static int get_node_num(struct mrrm_mem_range_entry *e)
> +{
> + unsigned int nid;
> +
> + for_each_online_node(nid) {
> + for (int z = 0; z < MAX_NR_ZONES; z++) {
> + struct zone *zone = NODE_DATA(nid)->node_zones + z;
> +
> + if (!populated_zone(zone))
> + continue;
> + if (zone_intersects(zone, PHYS_PFN(e->base), PHYS_PFN(e->length)))
> + return zone_to_nid(zone);
> + }
> + }
> +
> + return -ENOENT;
> +}
> +
> static __init int acpi_parse_mrrm(struct acpi_table_header *table)
> {
> + struct acpi_mrrm_mem_range_entry *mre_entry;
> struct acpi_table_mrrm *mrrm;
> + void *mre, *mrrm_end;
> + int mre_count = 0;
>
> mrrm = (struct acpi_table_mrrm *)table;
> if (!mrrm)
> return -ENODEV;
>
> + if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS)
> + return -EOPNOTSUPP;
> +
> + mrrm_end = (void *)mrrm + mrrm->header.length - 1;
> + mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
> + while (mre < mrrm_end) {
> + mre_entry = mre;
> + mre_count++;
> + mre += mre_entry->header.length;
> + }
> + if (!mre_count) {
> + pr_info(FW_BUG "No ranges listed in MRRM table\n");
> + return -EINVAL;
> + }
> +
> + mrrm_mem_range_entry = kmalloc_array(mre_count, sizeof(*mrrm_mem_range_entry),
> + GFP_KERNEL | __GFP_ZERO);
> + if (!mrrm_mem_range_entry)
> + return -ENOMEM;
> +
> + mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
> + while (mre < mrrm_end) {
> + struct mrrm_mem_range_entry *e;
> +
> + mre_entry = mre;
> + e = mrrm_mem_range_entry + mrrm_mem_entry_num;
> +
> + e->base = mre_entry->addr_base;
> + e->length = mre_entry->addr_len;
> + e->node = get_node_num(e);
> +
> + if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL)
> + e->local_region_id = mre_entry->local_region_id;
> + else
> + e->local_region_id = -1;
> + if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE)
> + e->remote_region_id = mre_entry->remote_region_id;
> + else
> + e->remote_region_id = -1;
> +
> + mrrm_mem_entry_num++;
> + mre += mre_entry->header.length;
> + }
> +
> max_mem_region = mrrm->max_mem_region;
>
> return 0;
> }
>
> +#define RANGE_ATTR(name, fmt) \
> +static ssize_t name##_show(struct kobject *kobj, \
"name" is used as a macro parameter. But "name" is also used as a
variable mre->name in the macro. checkpatch complains this kind of usage.
Maybe change the parameter "name" as something like "range_name" to
avoid the potential confusion?
> + struct kobj_attribute *attr, char *buf) \
> +{ \
> + struct mrrm_mem_range_entry *mre; \
> + const char *kname = kobject_name(kobj); \
> + int n, ret; \
> + \
> + ret = kstrtoint(kname + 5, 10, &n); \
> + if (ret) \
> + return ret; \
> + \
> + mre = mrrm_mem_range_entry + n; \
> + \
> + return sysfs_emit(buf, fmt, mre->name); \
> +} \
> +static struct kobj_attribute name##_attr = __ATTR_RO(name)
> +
> +RANGE_ATTR(base, "0x%llx\n");
> +RANGE_ATTR(length, "0x%llx\n");
> +RANGE_ATTR(node, "%d\n");
> +RANGE_ATTR(local_region_id, "%d\n");
> +RANGE_ATTR(remote_region_id, "%d\n");
> +
> +static struct attribute *memory_range_attrs[] = {
> + &base_attr.attr,
> + &length_attr.attr,
> + &node_attr.attr,
> + &local_region_id_attr.attr,
> + &remote_region_id_attr.attr,
> + NULL
> +};
> +
> +ATTRIBUTE_GROUPS(memory_range);
> +
> +static __init int add_boot_memory_ranges(void)
> +{
> + struct kobject *pkobj, *kobj;
> + int ret = -EINVAL;
> + char *name;
> +
> + pkobj = kobject_create_and_add("memory_ranges", acpi_kobj);
> +
> + for (int i = 0; i < mrrm_mem_entry_num; i++) {
> + name = kasprintf(GFP_KERNEL, "range%d", i);
> + if (!name)
> + break;
> +
> + kobj = kobject_create_and_add(name, pkobj);
> +
> + ret = sysfs_create_groups(kobj, memory_range_groups);
> + if (ret)
> + return ret;
> + }
> +
> + return ret;
> +}
> +
> static __init int mrrm_init(void)
> {
> int ret;
>
> ret = acpi_table_parse(ACPI_SIG_MRRM, acpi_parse_mrrm);
>
This blank line seems redundant. Maybe remove it so that the "if (ret <
0)" sentence follows the "ret = ...." sentence immediately?
> - return ret;
> + if (ret < 0)
> + return ret;
> +
> + return add_boot_memory_ranges();
> }
> device_initcall(mrrm_init);
Thanks.
-Fenghua
On Sun, May 04, 2025 at 11:23:50PM -0700, Fenghua Yu wrote:
> Hi, Tony,
> > +#define RANGE_ATTR(name, fmt) \
> > +static ssize_t name##_show(struct kobject *kobj, \
>
> "name" is used as a macro parameter. But "name" is also used as a variable
> mre->name in the macro. checkpatch complains this kind of usage.
The checkpatch complaint is that is is used twice. Once as "mre->name"
(as you noted), but also as "__ATTR_RO(name)" two lines after.
Checkpatch is worried that the macor might be invoked with an argument
that has side effects ("foo++", or return from function call "baz()")
which would result in the side-effects happening twice.
It's a false positive in this case because:
1) This macro is only used with a simple argument (and can only ever be
used in that way.
2) It's used for a static initialization of compile time, so a 2nd
reason why side-effects from an argment are not possible.
> Maybe change the parameter "name" as something like "range_name" to avoid
> the potential confusion?
>
> > + struct kobj_attribute *attr, char *buf) \
> > +{ \
> > + struct mrrm_mem_range_entry *mre; \
> > + const char *kname = kobject_name(kobj); \
> > + int n, ret; \
> > + \
> > + ret = kstrtoint(kname + 5, 10, &n); \
> > + if (ret) \
> > + return ret; \
> > + \
> > + mre = mrrm_mem_range_entry + n; \
> > + \
> > + return sysfs_emit(buf, fmt, mre->name); \
> > +} \
> > +static struct kobj_attribute name##_attr = __ATTR_RO(name)
> > +
> > +RANGE_ATTR(base, "0x%llx\n");
> > +RANGE_ATTR(length, "0x%llx\n");
> > +RANGE_ATTR(node, "%d\n");
> > +RANGE_ATTR(local_region_id, "%d\n");
> > +RANGE_ATTR(remote_region_id, "%d\n");
...
> > +
> > static __init int mrrm_init(void)
> > {
> > int ret;
> > ret = acpi_table_parse(ACPI_SIG_MRRM, acpi_parse_mrrm);
> This blank line seems redundant. Maybe remove it so that the "if (ret < 0)"
> sentence follows the "ret = ...." sentence immediately?
Agreed. I will delete in next version.
> > - return ret;
> > + if (ret < 0)
> > + return ret;
> > +
> > + return add_boot_memory_ranges();
> > }
> > device_initcall(mrrm_init);
>
> Thanks.
>
> -Fenghua
Thanks for the review.
-Tony
>
© 2016 - 2026 Red Hat, Inc.