[PATCH 2/4] ACPI/MRRM: Create /sys/devices/system/memory/rangeX ABI

Tony Luck posted 4 patches 12 months ago
There is a newer version of this series
[PATCH 2/4] ACPI/MRRM: Create /sys/devices/system/memory/rangeX ABI
Posted by Tony Luck 12 months ago
Perf and resctrl users need an enumeration of which memory addresses
are bound to which "region" tag.

Parse the ACPI MRRM table and add /sys entries for each memory range
describing base address, length, and which region tags apply for
same-socket and cross-socket access.

[Derived from code developed by Fenghua Yu <fenghua.yu@intel.com>]

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/memory.h   |   9 +++
 drivers/acpi/acpi_mrrm.c | 159 +++++++++++++++++++++++++++++++++++++++
 drivers/base/memory.c    |   9 +++
 arch/x86/Kconfig         |   1 +
 drivers/acpi/Kconfig     |   4 +
 drivers/acpi/Makefile    |   1 +
 6 files changed, 183 insertions(+)
 create mode 100644 drivers/acpi/acpi_mrrm.c

diff --git a/include/linux/memory.h b/include/linux/memory.h
index c0afee5d126e..0a21943ce44d 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -189,4 +189,13 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
  */
 extern struct mutex text_mutex;
 
+#ifdef CONFIG_ACPI_MRRM
+int mrrm_max_mem_region(void);
+int memory_subsys_device_register(struct device *dev);
+#else
+static inline int mrrm_max_mem_region(void) { return -EONENT; }
+static inline int memory_subsys_device_register(struct device *dev) { return -EINVAL; }
+#define memory_subsys_device_register memory_subsys_device_register
+#endif
+
 #endif /* _LINUX_MEMORY_H_ */
diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c
new file mode 100644
index 000000000000..51ed9064e025
--- /dev/null
+++ b/drivers/acpi/acpi_mrrm.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025, Intel Corporation.
+ *
+ * Memory Range and Region Mapping (MRRM) structure
+ *
+ * Parse and report the platform's MRRM table in /sys.
+ */
+
+#define pr_fmt(fmt) "acpi/mrrm: " fmt
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/memory.h>
+#include <linux/sysfs.h>
+
+static int max_mem_region = -ENOENT;
+
+/* Access for use by resctrl file system */
+int mrrm_max_mem_region(void)
+{
+	return max_mem_region;
+}
+
+struct mrrm_mem_range_entry {
+	struct device dev;
+	u64 base;
+	u64 length;
+	u8  local_region_id;
+	u8  remote_region_id;
+};
+
+static struct mrrm_mem_range_entry *mrrm_mem_range_entry;
+static u32 mrrm_mem_entry_num;
+
+static __init int acpi_parse_mrrm(struct acpi_table_header *table)
+{
+	struct acpi_table_mrrm_mem_range_entry *mre_entry;
+	struct acpi_table_mrrm *mrrm;
+	void *mre, *mrrm_end;
+	int mre_count = 0;
+
+	mrrm = (struct acpi_table_mrrm *)table;
+	if (!mrrm)
+		return -ENODEV;
+
+	if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS)
+		return -EOPNOTSUPP;
+
+	mrrm_end = (void *)mrrm + mrrm->header.length - 1;
+	mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+	while (mre < mrrm_end) {
+		mre_entry = mre;
+		mre_count++;
+		mre += mre_entry->length;
+	}
+	if (!mre_count) {
+		pr_info(FW_BUG "No ranges listed in MRRM table\n");
+		return -EINVAL;
+	}
+
+	mrrm_mem_range_entry = kmalloc_array(mre_count, sizeof(*mrrm_mem_range_entry),
+					     GFP_KERNEL | __GFP_ZERO);
+	if (!mrrm_mem_range_entry)
+		return -ENOMEM;
+
+	mre = (void *)mrrm + sizeof(struct acpi_table_mrrm);
+	while (mre < mrrm_end) {
+		struct mrrm_mem_range_entry *e;
+
+		mre_entry = mre;
+		e = mrrm_mem_range_entry + mrrm_mem_entry_num;
+
+		e->base = ((u64)mre_entry->base_addr_high << 32) + mre_entry->base_addr_low;
+		e->length = ((u64)mre_entry->len_high << 32) + mre_entry->len_low;
+
+		if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL)
+			e->local_region_id = mre_entry->local_region_id;
+		else
+			e->local_region_id = -1;
+		if (mre_entry->region_id_flags & ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE)
+			e->remote_region_id = mre_entry->remote_region_id;
+		else
+			e->remote_region_id = -1;
+
+		mrrm_mem_entry_num++;
+		mre += mre_entry->length;
+	}
+
+	max_mem_region = mrrm->max_mem_region;
+
+	return 0;
+}
+
+#define RANGE_ATTR(name)						\
+static ssize_t name##_show(struct device *dev,				\
+			  struct device_attribute *attr, char *buf)	\
+{									\
+	struct mrrm_mem_range_entry *mre;				\
+									\
+	mre = container_of(dev, struct mrrm_mem_range_entry, dev);	\
+	return sysfs_emit(buf, "0x%lx\n", (unsigned long)mre->name);	\
+}									\
+static DEVICE_ATTR_RO(name)
+
+RANGE_ATTR(base);
+RANGE_ATTR(length);
+RANGE_ATTR(local_region_id);
+RANGE_ATTR(remote_region_id);
+
+static struct attribute *memory_range_attrs[] = {
+	&dev_attr_base.attr,
+	&dev_attr_length.attr,
+	&dev_attr_local_region_id.attr,
+	&dev_attr_remote_region_id.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(memory_range);
+
+static __init int add_boot_memory_ranges(void)
+{
+	char name[16];
+	int i, ret;
+
+	for (i = 0; i < mrrm_mem_entry_num; i++) {
+		struct mrrm_mem_range_entry *entry;
+
+		entry = mrrm_mem_range_entry + i;
+
+		sprintf(name, "range%d", i);
+		entry->dev.init_name = name;
+
+		entry->dev.id = i;
+		entry->dev.groups = memory_range_groups;
+
+		ret = memory_subsys_device_register(&entry->dev);
+		if (ret) {
+			put_device(&entry->dev);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static __init int mrrm_init(void)
+{
+	int ret;
+
+	ret = acpi_table_parse(ACPI_SIG_MRRM, acpi_parse_mrrm);
+
+	if (ret < 0)
+		return ret;
+
+	return add_boot_memory_ranges();
+}
+device_initcall(mrrm_init);
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 348c5dbbfa68..1f7853a4df5c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -699,6 +699,15 @@ static int __add_memory_block(struct memory_block *memory)
 	return ret;
 }
 
+#ifndef memory_subsys_device_register
+int memory_subsys_device_register(struct device *dev)
+{
+	dev->bus = &memory_subsys;
+
+	return device_register(dev);
+}
+#endif
+
 static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
 						     int nid)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..96aa73e8fb13 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86_64
 	select ARCH_HAS_ELFCORE_COMPAT
 	select ZONE_DMA32
 	select EXECMEM if DYNAMIC_FTRACE
+	select ACPI_MRRM			if MEMORY_HOTPLUG
 
 config FORCE_DYNAMIC_FTRACE
 	def_bool y
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index d81b55f5068c..c3d1b0217e99 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -576,6 +576,10 @@ config ACPI_FFH
 	  Enable this feature if you want to set up and install the FFH Address
 	  Space handler to handle FFH OpRegion in the firmware.
 
+config ACPI_MRRM
+	bool
+	depends on MEMORY_HOTPLUG
+
 source "drivers/acpi/pmic/Kconfig"
 
 config ACPI_VIOT
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 40208a0f5dfb..5092b518fc9b 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -62,6 +62,7 @@ acpi-$(CONFIG_ACPI_WATCHDOG)	+= acpi_watchdog.o
 acpi-$(CONFIG_ACPI_PRMT)	+= prmt.o
 acpi-$(CONFIG_ACPI_PCC)		+= acpi_pcc.o
 acpi-$(CONFIG_ACPI_FFH)		+= acpi_ffh.o
+acpi-$(CONFIG_ACPI_MRRM)	+= acpi_mrrm.o
 
 # Address translation
 acpi-$(CONFIG_ACPI_ADXL)	+= acpi_adxl.o
-- 
2.48.1
Re: [PATCH 2/4] ACPI/MRRM: Create /sys/devices/system/memory/rangeX ABI
Posted by David Hildenbrand 12 months ago
On 10.02.25 22:12, Tony Luck wrote:
> Perf and resctrl users need an enumeration of which memory addresses
> are bound to which "region" tag.
> 
> Parse the ACPI MRRM table and add /sys entries for each memory range
> describing base address, length, and which region tags apply for
> same-socket and cross-socket access.

How does an example in /sys/devices/system/memory/ look like later?

 From a quick glimpse, I am not sure if this really belongs into 
/sys/devices/system/memory/, but I am missing some information in cover 
letter / patch.

-- 
Cheers,

David / dhildenb
RE: [PATCH 2/4] ACPI/MRRM: Create /sys/devices/system/memory/rangeX ABI
Posted by Luck, Tony 12 months ago
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index c0afee5d126e..0a21943ce44d 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -189,4 +189,13 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
>   */
>  extern struct mutex text_mutex;
>
> +#ifdef CONFIG_ACPI_MRRM
> +int mrrm_max_mem_region(void);
> +int memory_subsys_device_register(struct device *dev);
> +#else
> +static inline int mrrm_max_mem_region(void) { return -EONENT; }

The lkp robot just pointed out my spelling error. Should be ENOENT.

> +static inline int memory_subsys_device_register(struct device *dev) { return -EINVAL; }
> +#define memory_subsys_device_register memory_subsys_device_register
> +#endif
> +
>  #endif /* _LINUX_MEMORY_H_ */

-Tony