Provide a new interface for dynamic configuration and deconfiguration of
hotplug memory, allowing with/without memmap_on_memory support. It is a
follow up on the discussion with David when introducing memmap_on_memory
support for s390 and support dynamic (de)configuration of memory:
https://lore.kernel.org/all/ee492da8-74b4-4a97-8b24-73e07257f01d@redhat.com/
https://lore.kernel.org/all/20241202082732.3959803-1-sumanthk@linux.ibm.com/
The original motivation for introducing memmap_on_memory on s390 was to
avoid using online memory to store struct pages metadata, particularly
for standby memory blocks. This became critical in cases where there was
an imbalance between standby and online memory, potentially leading to
boot failures due to insufficient memory for metadata allocation.
To address this, memmap_on_memory was utilized on s390. However, in its
current form, it adds struct pages metadata at the start of each memory
block at the time of addition and this configuration is static. It
cannot be changed at runtime. (When the user needs continuous physical
memory).
Inorder to provide more flexibility to the user and overcome the above
limitation, add option to dynamically configure and deconfigure
hotpluggable memory block with/without memmap_on_memory.
With the new interface, s390 will not add all possible hotplug memory in
advance, like before, to make it visible in sysfs for online/offline
actions. Instead, before memory block can be set online, it has to be
configured via a new interface in /sys/firmware/memory/memoryX/config,
which makes s390 similar to others. i.e. Adding of hotpluggable memory is
controlled by the user instead of adding it at boottime.
The s390 kernel sysfs interface to configure and deconfigure memory is
as follows (considering the upcoming lsmem changes):
* Initial memory layout:
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x7fffffff 2G online 0-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
* Configure memory
sys="/sys"
echo 1 > $sys/firmware/memory/memory16/config
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x7fffffff 2G online 0-15 yes no
0x80000000-0x87ffffff 128M offline 16 yes yes
0x88000000-0xffffffff 1.9G offline 17-31 no yes
* Deconfigure memory
echo 0 > $sys/firmware/memory/memory16/config
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x7fffffff 2G online 0-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
3. Enable memmap_on_memory and online it.
echo 0 > $sys/devices/system/memory/memory5/online
echo 0 > $sys/firmware/memory/memory5/config
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x27ffffff 640M online 0-4 yes no
0x28000000-0x2fffffff 128M offline 5 no no
0x30000000-0x7fffffff 1.3G online 6-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
echo 1 > $sys/firmware/memory/memory5/memmap_on_memory
echo 1 > $sys/firmware/memory/memory5/config
echo 1 > $sys/devices/system/memory/memory5/online
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x27ffffff 640M online 0-4 yes no
0x28000000-0x2fffffff 128M online 5 yes yes
0x30000000-0x7fffffff 1.3G online 6-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
4. Disable memmap_on_memory and online it.
echo 0 > $sys/devices/system/memory/memory5/online
echo 0 > $sys/firmware/memory/memory5/config
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x27ffffff 640M online 0-4 yes no
0x28000000-0x2fffffff 128M offline 5 no yes
0x30000000-0x7fffffff 1.3G online 6-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
echo 0 > $sys/firmware/memory/memory5/memmap_on_memory
echo 1 > $sys/firmware/memory/memory5/config
echo 1 > $sys/devices/system/memory/memory5/online
lsmem -o RANGE,SIZE,STATE,BLOCK,CONFIGURED,MEMMAP_ON_MEMORY
RANGE SIZE STATE BLOCK CONFIGURED MEMMAP_ON_MEMORY
0x00000000-0x7fffffff 2G online 0-15 yes no
0x80000000-0xffffffff 2G offline 16-31 no yes
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
---
drivers/s390/char/sclp_mem.c | 291 +++++++++++++++++++++++++++++------
1 file changed, 241 insertions(+), 50 deletions(-)
diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c
index 27f49f5fd358..802439230294 100644
--- a/drivers/s390/char/sclp_mem.c
+++ b/drivers/s390/char/sclp_mem.c
@@ -9,9 +9,12 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/cpufeature.h>
+#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/kstrtox.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
@@ -27,7 +30,6 @@
#define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001
#define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001
-static DEFINE_MUTEX(sclp_mem_mutex);
static LIST_HEAD(sclp_mem_list);
static u8 sclp_max_storage_id;
static DECLARE_BITMAP(sclp_storage_ids, 256);
@@ -38,6 +40,18 @@ struct memory_increment {
int standby;
};
+struct mblock {
+ struct kobject kobj;
+ unsigned int id;
+ unsigned int memmap_on_memory;
+ unsigned int config;
+};
+
+struct memory_block_arg {
+ struct mblock *mblocks;
+ struct kset *kset;
+};
+
struct assign_storage_sccb {
struct sccb_header header;
u16 rn;
@@ -185,15 +199,11 @@ static int sclp_mem_notifier(struct notifier_block *nb,
{
unsigned long start, size;
struct memory_notify *arg;
- unsigned char id;
int rc = 0;
arg = data;
start = arg->start_pfn << PAGE_SHIFT;
size = arg->nr_pages << PAGE_SHIFT;
- mutex_lock(&sclp_mem_mutex);
- for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
- sclp_attach_storage(id);
switch (action) {
case MEM_GOING_OFFLINE:
/*
@@ -204,45 +214,201 @@ static int sclp_mem_notifier(struct notifier_block *nb,
if (contains_standby_increment(start, start + size))
rc = -EPERM;
break;
- case MEM_PREPARE_ONLINE:
- /*
- * Access the altmap_start_pfn and altmap_nr_pages fields
- * within the struct memory_notify specifically when dealing
- * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
- *
- * When altmap is in use, take the specified memory range
- * online, which includes the altmap.
- */
- if (arg->altmap_nr_pages) {
- start = PFN_PHYS(arg->altmap_start_pfn);
- size += PFN_PHYS(arg->altmap_nr_pages);
- }
- rc = sclp_mem_change_state(start, size, 1);
- if (rc || !arg->altmap_nr_pages)
- break;
- /*
- * Set CMMA state to nodat here, since the struct page memory
- * at the beginning of the memory block will not go through the
- * buddy allocator later.
- */
- __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
+ default:
break;
- case MEM_FINISH_OFFLINE:
+ }
+ return rc ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static ssize_t config_mblock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct mblock *mblock = container_of(kobj, struct mblock, kobj);
+
+ return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->config));
+}
+
+static ssize_t config_mblock_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long long addr, block_size;
+ struct memory_block *mem;
+ struct mblock *mblock;
+ unsigned char id;
+ bool value;
+ int rc;
+
+ rc = kstrtobool(buf, &value);
+ if (rc)
+ return rc;
+ mblock = container_of(kobj, struct mblock, kobj);
+ block_size = memory_block_size_bytes();
+ addr = mblock->id * block_size;
+ /*
+ * Hold device_hotplug_lock when adding/removing memory blocks.
+ * Additionally, also protect calls to find_memory_block() and
+ * sclp_attach_storage().
+ */
+ rc = lock_device_hotplug_sysfs();
+ if (rc)
+ goto out;
+ for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
+ sclp_attach_storage(id);
+ if (value) {
+ if (mblock->config)
+ goto out_unlock;
+ rc = sclp_mem_change_state(addr, block_size, 1);
+ if (rc)
+ goto out_unlock;
/*
- * When altmap is in use, take the specified memory range
- * offline, which includes the altmap.
+ * Set entire memory block CMMA state to nodat. Later, when
+ * page tables pages are allocated via __add_memory(), those
+ * regions are marked __arch_set_page_dat().
*/
- if (arg->altmap_nr_pages) {
- start = PFN_PHYS(arg->altmap_start_pfn);
- size += PFN_PHYS(arg->altmap_nr_pages);
+ __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
+ rc = __add_memory(0, addr, block_size,
+ mblock->memmap_on_memory ?
+ MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
+ if (rc)
+ goto out_unlock;
+ mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
+ put_device(&mem->dev);
+ WRITE_ONCE(mblock->config, 1);
+ } else {
+ if (!mblock->config)
+ goto out_unlock;
+ mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
+ if (mem->state != MEM_OFFLINE) {
+ put_device(&mem->dev);
+ rc = -EBUSY;
+ goto out_unlock;
}
- sclp_mem_change_state(start, size, 0);
- break;
- default:
- break;
+ /* drop the ref just got via find_memory_block() */
+ put_device(&mem->dev);
+ sclp_mem_change_state(addr, block_size, 0);
+ __remove_memory(addr, block_size);
+ WRITE_ONCE(mblock->config, 0);
}
- mutex_unlock(&sclp_mem_mutex);
- return rc ? NOTIFY_BAD : NOTIFY_OK;
+out_unlock:
+ unlock_device_hotplug();
+out:
+ return rc ? rc : count;
+}
+
+static ssize_t memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct mblock *mblock = container_of(kobj, struct mblock, kobj);
+
+ return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->memmap_on_memory));
+}
+
+static ssize_t memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long block_size;
+ struct memory_block *mem;
+ struct mblock *mblock;
+ bool value;
+ int rc;
+
+ rc = kstrtobool(buf, &value);
+ if (rc)
+ return rc;
+ rc = lock_device_hotplug_sysfs();
+ if (rc)
+ return rc;
+ block_size = memory_block_size_bytes();
+ mblock = container_of(kobj, struct mblock, kobj);
+ mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(mblock->id * block_size)));
+ if (!mem) {
+ WRITE_ONCE(mblock->memmap_on_memory, value);
+ } else {
+ put_device(&mem->dev);
+ rc = -EBUSY;
+ }
+ unlock_device_hotplug();
+ return rc ? rc : count;
+}
+
+static void mblock_sysfs_release(struct kobject *kobj)
+{
+ struct mblock *mblock = container_of(kobj, struct mblock, kobj);
+
+ kfree(mblock);
+}
+
+static const struct kobj_type ktype = {
+ .release = mblock_sysfs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute memmap_attr =
+ __ATTR(memmap_on_memory, 0644, memmap_on_memory_show, memmap_on_memory_store);
+static struct kobj_attribute config_attr =
+ __ATTR(config, 0644, config_mblock_show, config_mblock_store);
+
+static struct attribute *mblock_attrs[] = {
+ &memmap_attr.attr,
+ &config_attr.attr,
+ NULL,
+};
+
+static struct attribute_group mblock_attr_group = {
+ .attrs = mblock_attrs,
+};
+
+static int create_mblock(struct mblock *mblock, struct kset *kset,
+ unsigned int id, bool config, bool memmap_on_memory)
+{
+ int rc;
+
+ mblock->memmap_on_memory = memmap_on_memory;
+ mblock->config = config;
+ mblock->id = id;
+ kobject_init(&mblock->kobj, &ktype);
+ rc = kobject_add(&mblock->kobj, &kset->kobj, "memory%d", id);
+ if (rc)
+ return rc;
+ rc = sysfs_create_group(&mblock->kobj, &mblock_attr_group);
+ if (rc)
+ kobject_put(&mblock->kobj);
+ return rc;
+}
+
+/*
+ * Create /sys/firmware/memory/memoryX for boottime configured online memory
+ * blocks
+ */
+static int create_online_mblock(struct memory_block *mem, void *argument)
+{
+ struct memory_block_arg *arg;
+ struct mblock *mblocks;
+ struct kset *kset;
+ unsigned int id;
+
+ id = mem->dev.id;
+ arg = (struct memory_block_arg *)argument;
+ mblocks = arg->mblocks;
+ kset = arg->kset;
+ return create_mblock(&mblocks[id], kset, id, true, false);
+}
+
+static int __init create_initial_online_mblocks(struct mblock *mblocks, struct kset *kset)
+{
+ struct memory_block_arg arg;
+
+ arg.mblocks = mblocks;
+ arg.kset = kset;
+ return for_each_memory_block(&arg, create_online_mblock);
+}
+
+static struct mblock * __init allocate_mblocks(void)
+{
+ u64 max_mblocks;
+ u64 block_size;
+
+ block_size = memory_block_size_bytes();
+ max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
+ return kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
}
static struct notifier_block sclp_mem_nb = {
@@ -264,14 +430,17 @@ static void __init align_to_block_size(unsigned long *start,
*size = size_align;
}
-static void __init add_memory_merged(u16 rn)
+static int __init create_standby_mblocks_merged(struct mblock *mblocks,
+ struct kset *kset, u16 rn)
{
unsigned long start, size, addr, block_size;
static u16 first_rn, num;
+ unsigned int id;
+ int rc = 0;
if (rn && first_rn && (first_rn + num == rn)) {
num++;
- return;
+ return rc;
}
if (!first_rn)
goto skip_add;
@@ -286,24 +455,31 @@ static void __init add_memory_merged(u16 rn)
if (!size)
goto skip_add;
for (addr = start; addr < start + size; addr += block_size) {
- add_memory(0, addr, block_size,
- cpu_has_edat1() ?
- MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
+ id = addr / block_size;
+ rc = create_mblock(&mblocks[id], kset, id, false, mhp_supports_memmap_on_memory());
+ if (rc)
+ break;
}
skip_add:
first_rn = rn;
num = 1;
+ return rc;
}
-static void __init sclp_add_standby_memory(void)
+static int __init create_standby_mblocks(struct mblock *mblocks, struct kset *kset)
{
struct memory_increment *incr;
+ int rc = 0;
list_for_each_entry(incr, &sclp_mem_list, list) {
if (incr->standby)
- add_memory_merged(incr->rn);
+ rc = create_standby_mblocks_merged(mblocks, kset, incr->rn);
+ if (rc)
+ goto out;
}
- add_memory_merged(0);
+ rc = create_standby_mblocks_merged(mblocks, kset, 0);
+out:
+ return rc;
}
static void __init insert_increment(u16 rn, int standby, int assigned)
@@ -336,10 +512,12 @@ static void __init insert_increment(u16 rn, int standby, int assigned)
list_add(&new_incr->list, prev);
}
-static int __init sclp_detect_standby_memory(void)
+static int __init sclp_setup_memory(void)
{
struct read_storage_sccb *sccb;
int i, id, assigned, rc;
+ struct mblock *mblocks;
+ struct kset *kset;
/* No standby memory in kdump mode */
if (oldmem_data.start)
@@ -391,9 +569,22 @@ static int __init sclp_detect_standby_memory(void)
rc = register_memory_notifier(&sclp_mem_nb);
if (rc)
goto out;
- sclp_add_standby_memory();
+ mblocks = allocate_mblocks();
+ if (!mblocks) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ kset = kset_create_and_add("memory", NULL, firmware_kobj);
+ if (!kset) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = create_initial_online_mblocks(mblocks, kset);
+ if (rc)
+ goto out;
+ rc = create_standby_mblocks(mblocks, kset);
out:
free_page((unsigned long)sccb);
return rc;
}
-__initcall(sclp_detect_standby_memory);
+__initcall(sclp_setup_memory);
--
2.48.1
[...]
> ---
> drivers/s390/char/sclp_mem.c | 291 +++++++++++++++++++++++++++++------
> 1 file changed, 241 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c
> index 27f49f5fd358..802439230294 100644
> --- a/drivers/s390/char/sclp_mem.c
> +++ b/drivers/s390/char/sclp_mem.c
> @@ -9,9 +9,12 @@
> #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
>
> #include <linux/cpufeature.h>
> +#include <linux/container_of.h>
> #include <linux/err.h>
> #include <linux/errno.h>
> #include <linux/init.h>
> +#include <linux/kobject.h>
> +#include <linux/kstrtox.h>
> #include <linux/memory.h>
> #include <linux/memory_hotplug.h>
> #include <linux/mm.h>
> @@ -27,7 +30,6 @@
> #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001
> #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001
>
> -static DEFINE_MUTEX(sclp_mem_mutex);
> static LIST_HEAD(sclp_mem_list);
> static u8 sclp_max_storage_id;
> static DECLARE_BITMAP(sclp_storage_ids, 256);
> @@ -38,6 +40,18 @@ struct memory_increment {
> int standby;
> };
>
> +struct mblock {
> + struct kobject kobj;
> + unsigned int id;
> + unsigned int memmap_on_memory;
> + unsigned int config;
> +};
> +
> +struct memory_block_arg {
> + struct mblock *mblocks;
> + struct kset *kset;
> +};
I would avoid using "memory_block_arg" as it reminds of core mm "struct memory_block".
Similarly, I'd not call this "mblock".
What about incorporating the "sclp" side of things?
"struct sclp_mem" / "struct sclp_mem_arg"
Nicely fits "sclp_mem.c" ;)
Something like that might be better.
> +
> struct assign_storage_sccb {
> struct sccb_header header;
> u16 rn;
> @@ -185,15 +199,11 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> {
> unsigned long start, size;
> struct memory_notify *arg;
> - unsigned char id;
> int rc = 0;
>
> arg = data;
> start = arg->start_pfn << PAGE_SHIFT;
> size = arg->nr_pages << PAGE_SHIFT;
> - mutex_lock(&sclp_mem_mutex);
> - for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> - sclp_attach_storage(id);
> switch (action) {
> case MEM_GOING_OFFLINE:
> /*
> @@ -204,45 +214,201 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> if (contains_standby_increment(start, start + size))
> rc = -EPERM;
> break;
Is there any reson this notifier is still needed? I'd assume we can just allow
for offlining + re-onlining as we please now.
In fact, I'd assume we can get rid of the notifier entirely now?
> - case MEM_PREPARE_ONLINE:
> - /*
> - * Access the altmap_start_pfn and altmap_nr_pages fields
> - * within the struct memory_notify specifically when dealing
> - * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
> - *
> - * When altmap is in use, take the specified memory range
> - * online, which includes the altmap.
> - */
> - if (arg->altmap_nr_pages) {
> - start = PFN_PHYS(arg->altmap_start_pfn);
> - size += PFN_PHYS(arg->altmap_nr_pages);
> - }
> - rc = sclp_mem_change_state(start, size, 1);
> - if (rc || !arg->altmap_nr_pages)
> - break;
> - /*
> - * Set CMMA state to nodat here, since the struct page memory
> - * at the beginning of the memory block will not go through the
> - * buddy allocator later.
> - */
> - __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
> + default:
> break;
> - case MEM_FINISH_OFFLINE:
> + }
> + return rc ? NOTIFY_BAD : NOTIFY_OK;
> +}
> +
> +static ssize_t config_mblock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
> +
> + return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->config));
> +}
> +
> +static ssize_t config_mblock_store(struct kobject *kobj, struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned long long addr, block_size;
"unsigned long" should be sufficient I'm sure :)
> + struct memory_block *mem;
> + struct mblock *mblock;
> + unsigned char id;
> + bool value;
> + int rc;
> +
> + rc = kstrtobool(buf, &value);
> + if (rc)
> + return rc;
> + mblock = container_of(kobj, struct mblock, kobj);
> + block_size = memory_block_size_bytes();
> + addr = mblock->id * block_size;
> + /*
> + * Hold device_hotplug_lock when adding/removing memory blocks.
> + * Additionally, also protect calls to find_memory_block() and
> + * sclp_attach_storage().
> + */
> + rc = lock_device_hotplug_sysfs();
> + if (rc)
> + goto out;
> + for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> + sclp_attach_storage(id);
> + if (value) {
> + if (mblock->config)
> + goto out_unlock;
> + rc = sclp_mem_change_state(addr, block_size, 1);
> + if (rc)
> + goto out_unlock;
> /*
> - * When altmap is in use, take the specified memory range
> - * offline, which includes the altmap.
> + * Set entire memory block CMMA state to nodat. Later, when
> + * page tables pages are allocated via __add_memory(), those
> + * regions are marked __arch_set_page_dat().
> */
> - if (arg->altmap_nr_pages) {
> - start = PFN_PHYS(arg->altmap_start_pfn);
> - size += PFN_PHYS(arg->altmap_nr_pages);
> + __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
> + rc = __add_memory(0, addr, block_size,
> + mblock->memmap_on_memory ?
> + MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
> + if (rc)
> + goto out_unlock;
Do we have to undo the state change?
> + mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
> + put_device(&mem->dev);
> + WRITE_ONCE(mblock->config, 1);
> + } else {
> + if (!mblock->config)
> + goto out_unlock;
> + mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
> + if (mem->state != MEM_OFFLINE) {
> + put_device(&mem->dev);
> + rc = -EBUSY;
> + goto out_unlock;
> }
> - sclp_mem_change_state(start, size, 0);
> - break;
> - default:
> - break;
> + /* drop the ref just got via find_memory_block() */
> + put_device(&mem->dev);
> + sclp_mem_change_state(addr, block_size, 0);
> + __remove_memory(addr, block_size);
> + WRITE_ONCE(mblock->config, 0);
> }
> - mutex_unlock(&sclp_mem_mutex);
> - return rc ? NOTIFY_BAD : NOTIFY_OK;
> +out_unlock:
> + unlock_device_hotplug();
> +out:
> + return rc ? rc : count;
> +}
> +
> +static ssize_t memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
> +
> + return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->memmap_on_memory));
> +}
> +
> +static ssize_t memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned long block_size;
> + struct memory_block *mem;
> + struct mblock *mblock;
> + bool value;
> + int rc;
> +
> + rc = kstrtobool(buf, &value);
> + if (rc)
> + return rc;
> + rc = lock_device_hotplug_sysfs();
> + if (rc)
> + return rc;
> + block_size = memory_block_size_bytes();
> + mblock = container_of(kobj, struct mblock, kobj);
> + mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(mblock->id * block_size)));
> + if (!mem) {
> + WRITE_ONCE(mblock->memmap_on_memory, value);
> + } else {
> + put_device(&mem->dev);
> + rc = -EBUSY;
> + }
> + unlock_device_hotplug();
> + return rc ? rc : count;
> +}
> +
> +static void mblock_sysfs_release(struct kobject *kobj)
> +{
> + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
> +
> + kfree(mblock);
> +}
> +
> +static const struct kobj_type ktype = {
> + .release = mblock_sysfs_release,
> + .sysfs_ops = &kobj_sysfs_ops,
> +};
> +
> +static struct kobj_attribute memmap_attr =
> + __ATTR(memmap_on_memory, 0644, memmap_on_memory_show, memmap_on_memory_store);
> +static struct kobj_attribute config_attr =
> + __ATTR(config, 0644, config_mblock_show, config_mblock_store);
> +
> +static struct attribute *mblock_attrs[] = {
> + &memmap_attr.attr,
> + &config_attr.attr,
> + NULL,
> +};
> +
> +static struct attribute_group mblock_attr_group = {
> + .attrs = mblock_attrs,
> +};
> +
> +static int create_mblock(struct mblock *mblock, struct kset *kset,
> + unsigned int id, bool config, bool memmap_on_memory)
> +{
> + int rc;
> +
> + mblock->memmap_on_memory = memmap_on_memory;
> + mblock->config = config;
> + mblock->id = id;
> + kobject_init(&mblock->kobj, &ktype);
> + rc = kobject_add(&mblock->kobj, &kset->kobj, "memory%d", id);
> + if (rc)
> + return rc;
> + rc = sysfs_create_group(&mblock->kobj, &mblock_attr_group);
> + if (rc)
> + kobject_put(&mblock->kobj);
> + return rc;
> +}
> +
> +/*
> + * Create /sys/firmware/memory/memoryX for boottime configured online memory
> + * blocks
> + */
> +static int create_online_mblock(struct memory_block *mem, void *argument)
"online" is conusing. It's "initial" / "configured". Same applies to the other functions
that mention "online".
> +{
> + struct memory_block_arg *arg;
> + struct mblock *mblocks;
> + struct kset *kset;
> + unsigned int id;
> +
> + id = mem->dev.id;
> + arg = (struct memory_block_arg *)argument;
> + mblocks = arg->mblocks;
> + kset = arg->kset;
> + return create_mblock(&mblocks[id], kset, id, true, false);
> +}
> +
> +static int __init create_initial_online_mblocks(struct mblock *mblocks, struct kset *kset)
> +{
> + struct memory_block_arg arg;
> +
> + arg.mblocks = mblocks;
> + arg.kset = kset;
> + return for_each_memory_block(&arg, create_online_mblock);
> +}
> +
> +static struct mblock * __init allocate_mblocks(void)
> +{
> + u64 max_mblocks;
Nit: why an u64? The block ids are "unsigned int id;"
> + u64 block_size;
> +
> + block_size = memory_block_size_bytes();
> + max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
> + return kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
I think you should structure the code a bit differently, not splitting
the function up into tiny helpers.
static int __init init_sclp_mem(void)
{
const u64 block_size = memory_block_size_bytes();
const u64 max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
struct sclp_mem_arg arg;
struct kset *kset;
int rc;
/* We'll allocate memory for all blocks ahead of time. */
sclp_mem = kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
if (!sclp_mem)
return -ENOMEM;
kset = kset_create_and_add("memory", NULL, firmware_kobj);
if (!kset)
return -ENOMEM;
/* Initial memory is in the "configured" state already. */
arg.sclp_mem = sclp_mem;
arg.kset = kset;
rc = for_each_memory_block(&arg, create_configured_sclp_mem);
if (rc)
return rc;
/* Standby memory is "deconfigured". */
return create_standby_sclp_mem(sclp_mem, kset);
}
Should still be quite readable.
> }
>
> static struct notifier_block sclp_mem_nb = {
> @@ -264,14 +430,17 @@ static void __init align_to_block_size(unsigned long *start,
> *size = size_align;
> }
>
> -static void __init add_memory_merged(u16 rn)
> +static int __init create_standby_mblocks_merged(struct mblock *mblocks,
> + struct kset *kset, u16 rn)
> {
> unsigned long start, size, addr, block_size;
> static u16 first_rn, num;
> + unsigned int id;
> + int rc = 0;
>
> if (rn && first_rn && (first_rn + num == rn)) {
> num++;
> - return;
> + return rc;
> }
> if (!first_rn)
> goto skip_add;
> @@ -286,24 +455,31 @@ static void __init add_memory_merged(u16 rn)
> if (!size)
> goto skip_add;
> for (addr = start; addr < start + size; addr += block_size) {
> - add_memory(0, addr, block_size,
> - cpu_has_edat1() ?
> - MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
> + id = addr / block_size;
> + rc = create_mblock(&mblocks[id], kset, id, false, mhp_supports_memmap_on_memory());
> + if (rc)
> + break;
> }
> skip_add:
> first_rn = rn;
> num = 1;
> + return rc;
> }
>
> -static void __init sclp_add_standby_memory(void)
> +static int __init create_standby_mblocks(struct mblock *mblocks, struct kset *kset)
> {
> struct memory_increment *incr;
> + int rc = 0;
>
> list_for_each_entry(incr, &sclp_mem_list, list) {
> if (incr->standby)
> - add_memory_merged(incr->rn);
> + rc = create_standby_mblocks_merged(mblocks, kset, incr->rn);
> + if (rc)
> + goto out;
> }
> - add_memory_merged(0);
> + rc = create_standby_mblocks_merged(mblocks, kset, 0);
> +out:
> + return rc;
> }
>
> static void __init insert_increment(u16 rn, int standby, int assigned)
> @@ -336,10 +512,12 @@ static void __init insert_increment(u16 rn, int standby, int assigned)
> list_add(&new_incr->list, prev);
> }
>
> -static int __init sclp_detect_standby_memory(void)
> +static int __init sclp_setup_memory(void)
> {
> struct read_storage_sccb *sccb;
> int i, id, assigned, rc;
> + struct mblock *mblocks;
> + struct kset *kset;
>
> /* No standby memory in kdump mode */
> if (oldmem_data.start)
Wouldn't we still want to create the ones for initial memory at least?
[...]
--
Cheers
David / dhildenb
On Tue, Oct 07, 2025 at 10:07:43PM +0200, David Hildenbrand wrote:
> [...]
>
> > ---
> > drivers/s390/char/sclp_mem.c | 291 +++++++++++++++++++++++++++++------
> > 1 file changed, 241 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/s390/char/sclp_mem.c b/drivers/s390/char/sclp_mem.c
> > index 27f49f5fd358..802439230294 100644
> > --- a/drivers/s390/char/sclp_mem.c
> > +++ b/drivers/s390/char/sclp_mem.c
> > @@ -9,9 +9,12 @@
> > #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
> > #include <linux/cpufeature.h>
> > +#include <linux/container_of.h>
> > #include <linux/err.h>
> > #include <linux/errno.h>
> > #include <linux/init.h>
> > +#include <linux/kobject.h>
> > +#include <linux/kstrtox.h>
> > #include <linux/memory.h>
> > #include <linux/memory_hotplug.h>
> > #include <linux/mm.h>
> > @@ -27,7 +30,6 @@
> > #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001
> > #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001
> > -static DEFINE_MUTEX(sclp_mem_mutex);
> > static LIST_HEAD(sclp_mem_list);
> > static u8 sclp_max_storage_id;
> > static DECLARE_BITMAP(sclp_storage_ids, 256);
> > @@ -38,6 +40,18 @@ struct memory_increment {
> > int standby;
> > };
> > +struct mblock {
> > + struct kobject kobj;
> > + unsigned int id;
> > + unsigned int memmap_on_memory;
> > + unsigned int config;
> > +};
> > +
> > +struct memory_block_arg {
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > +};
>
> I would avoid using "memory_block_arg" as it reminds of core mm "struct memory_block".
>
> Similarly, I'd not call this "mblock".
>
> What about incorporating the "sclp" side of things?
>
> "struct sclp_mem" / "struct sclp_mem_arg"
>
> Nicely fits "sclp_mem.c" ;)
>
> Something like that might be better.
Sure. I will change it. Thanks
> > +
> > struct assign_storage_sccb {
> > struct sccb_header header;
> > u16 rn;
> > @@ -185,15 +199,11 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> > {
> > unsigned long start, size;
> > struct memory_notify *arg;
> > - unsigned char id;
> > int rc = 0;
> > arg = data;
> > start = arg->start_pfn << PAGE_SHIFT;
> > size = arg->nr_pages << PAGE_SHIFT;
> > - mutex_lock(&sclp_mem_mutex);
> > - for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> > - sclp_attach_storage(id);
> > switch (action) {
> > case MEM_GOING_OFFLINE:
> > /*
> > @@ -204,45 +214,201 @@ static int sclp_mem_notifier(struct notifier_block *nb,
> > if (contains_standby_increment(start, start + size))
> > rc = -EPERM;
> > break;
>
> Is there any reson this notifier is still needed? I'd assume we can just allow
> for offlining + re-onlining as we please now.
>
> In fact, I'd assume we can get rid of the notifier entirely now?
I was initially uncertain about contains_standby_increment() use case
and didnt change it here. However, after testing by removing the
contains_standby_increment() checks, I observed that the common memory
hotplug code already prevents offlining a memory block that contains
holes. This ensures safety without relying on these checks.
c5e79ef561b0 ("mm/memory_hotplug.c: don't allow to online/offline memory blocks with holes")
i.e. #cp define storage 3504M standby 2148M
This leads to a configuration where memory block 27 contains both
assigned and standby incr.
But, offlining it will not succeed:
chmem -d 0x00000000d8000000-0x00000000dfffffff
chmem: Memory Block 27 (0x00000000d8000000-0x00000000dfffffff) disable
failed: Invalid argument
Hence, I will remove it. Thanks.
> > - case MEM_PREPARE_ONLINE:
> > - /*
> > - * Access the altmap_start_pfn and altmap_nr_pages fields
> > - * within the struct memory_notify specifically when dealing
> > - * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
> > - *
> > - * When altmap is in use, take the specified memory range
> > - * online, which includes the altmap.
> > - */
> > - if (arg->altmap_nr_pages) {
> > - start = PFN_PHYS(arg->altmap_start_pfn);
> > - size += PFN_PHYS(arg->altmap_nr_pages);
> > - }
> > - rc = sclp_mem_change_state(start, size, 1);
> > - if (rc || !arg->altmap_nr_pages)
> > - break;
> > - /*
> > - * Set CMMA state to nodat here, since the struct page memory
> > - * at the beginning of the memory block will not go through the
> > - * buddy allocator later.
> > - */
> > - __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
> > + default:
> > break;
> > - case MEM_FINISH_OFFLINE:
> > + }
> > + return rc ? NOTIFY_BAD : NOTIFY_OK;
> > +}
> > +
> > +static ssize_t config_mblock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> > +{
> > + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
> > +
> > + return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->config));
> > +}
> > +
> > +static ssize_t config_mblock_store(struct kobject *kobj, struct kobj_attribute *attr,
> > + const char *buf, size_t count)
> > +{
> > + unsigned long long addr, block_size;
>
> "unsigned long" should be sufficient I'm sure :)
Left over. I will do so.
> > + struct memory_block *mem;
> > + struct mblock *mblock;
> > + unsigned char id;
> > + bool value;
> > + int rc;
> > +
> > + rc = kstrtobool(buf, &value);
> > + if (rc)
> > + return rc;
> > + mblock = container_of(kobj, struct mblock, kobj);
> > + block_size = memory_block_size_bytes();
> > + addr = mblock->id * block_size;
> > + /*
> > + * Hold device_hotplug_lock when adding/removing memory blocks.
> > + * Additionally, also protect calls to find_memory_block() and
> > + * sclp_attach_storage().
> > + */
> > + rc = lock_device_hotplug_sysfs();
> > + if (rc)
> > + goto out;
> > + for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
> > + sclp_attach_storage(id);
> > + if (value) {
> > + if (mblock->config)
> > + goto out_unlock;
> > + rc = sclp_mem_change_state(addr, block_size, 1);
> > + if (rc)
> > + goto out_unlock;
> > /*
> > - * When altmap is in use, take the specified memory range
> > - * offline, which includes the altmap.
> > + * Set entire memory block CMMA state to nodat. Later, when
> > + * page tables pages are allocated via __add_memory(), those
> > + * regions are marked __arch_set_page_dat().
> > */
> > - if (arg->altmap_nr_pages) {
> > - start = PFN_PHYS(arg->altmap_start_pfn);
> > - size += PFN_PHYS(arg->altmap_nr_pages);
> > + __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
> > + rc = __add_memory(0, addr, block_size,
> > + mblock->memmap_on_memory ?
> > + MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
> > + if (rc)
> > + goto out_unlock;
>
> Do we have to undo the state change?
Intention was to keep error handling simple. In case of failure in
add_memory(), we would have state set to 1 (not given back). But,
subsequent configuration request for that block will not have an impact.
...
> > +static int create_mblock(struct mblock *mblock, struct kset *kset,
> > + unsigned int id, bool config, bool memmap_on_memory)
> > +{
> > + int rc;
> > +
> > + mblock->memmap_on_memory = memmap_on_memory;
> > + mblock->config = config;
> > + mblock->id = id;
> > + kobject_init(&mblock->kobj, &ktype);
> > + rc = kobject_add(&mblock->kobj, &kset->kobj, "memory%d", id);
> > + if (rc)
> > + return rc;
> > + rc = sysfs_create_group(&mblock->kobj, &mblock_attr_group);
> > + if (rc)
> > + kobject_put(&mblock->kobj);
> > + return rc;
> > +}
> > +
> > +/*
> > + * Create /sys/firmware/memory/memoryX for boottime configured online memory
> > + * blocks
> > + */
> > +static int create_online_mblock(struct memory_block *mem, void *argument)
>
> "online" is conusing. It's "initial" / "configured". Same applies to the other functions
> that mention "online".
Sure. I will change it.
> > +{
> > + struct memory_block_arg *arg;
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > + unsigned int id;
> > +
> > + id = mem->dev.id;
> > + arg = (struct memory_block_arg *)argument;
> > + mblocks = arg->mblocks;
> > + kset = arg->kset;
> > + return create_mblock(&mblocks[id], kset, id, true, false);
> > +}
> > +
> > +static int __init create_initial_online_mblocks(struct mblock *mblocks, struct kset *kset)
> > +{
> > + struct memory_block_arg arg;
> > +
> > + arg.mblocks = mblocks;
> > + arg.kset = kset;
> > + return for_each_memory_block(&arg, create_online_mblock);
> > +}
> > +
> > +static struct mblock * __init allocate_mblocks(void)
> > +{
> > + u64 max_mblocks;
>
> Nit: why an u64? The block ids are "unsigned int id;"
Sure. I will correct it.
> > + u64 block_size;
> > +
> > + block_size = memory_block_size_bytes();
> > + max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
> > + return kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
>
>
> I think you should structure the code a bit differently, not splitting
> the function up into tiny helpers.
>
> static int __init init_sclp_mem(void)
> {
> const u64 block_size = memory_block_size_bytes();
> const u64 max_mblocks = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
> struct sclp_mem_arg arg;
> struct kset *kset;
> int rc;
>
> /* We'll allocate memory for all blocks ahead of time. */
> sclp_mem = kcalloc(max_mblocks, sizeof(struct mblock), GFP_KERNEL);
> if (!sclp_mem)
> return -ENOMEM;
>
> kset = kset_create_and_add("memory", NULL, firmware_kobj);
> if (!kset)
> return -ENOMEM;
>
> /* Initial memory is in the "configured" state already. */
> arg.sclp_mem = sclp_mem;
> arg.kset = kset;
> rc = for_each_memory_block(&arg, create_configured_sclp_mem);
> if (rc)
> return rc;
>
> /* Standby memory is "deconfigured". */
> return create_standby_sclp_mem(sclp_mem, kset);
> }
>
> Should still be quite readable.
Then, I'll make use of it.
...
> > -static int __init sclp_detect_standby_memory(void)
> > +static int __init sclp_setup_memory(void)
> > {
> > struct read_storage_sccb *sccb;
> > int i, id, assigned, rc;
> > + struct mblock *mblocks;
> > + struct kset *kset;
> > /* No standby memory in kdump mode */
> > if (oldmem_data.start)
>
> Wouldn't we still want to create the ones for initial memory at least?
Intention was the following:
configuration and deconfiguration of memory with optional
memmap-on-memory is mostly needed for only standby memory.
If standby memory is absent or sclp is unavailable, we continue using
the previous behavior (only software offline/online), since the sclp
memory notifier was not registered in that case before either.
Thank you David
>> Is there any reson this notifier is still needed? I'd assume we can just allow
>> for offlining + re-onlining as we please now.
>>
>> In fact, I'd assume we can get rid of the notifier entirely now?
>
> I was initially uncertain about contains_standby_increment() use case
> and didnt change it here. However, after testing by removing the
> contains_standby_increment() checks, I observed that the common memory
> hotplug code already prevents offlining a memory block that contains
> holes. This ensures safety without relying on these checks.
>
> c5e79ef561b0 ("mm/memory_hotplug.c: don't allow to online/offline memory blocks with holes")
Rings a bell :)
>
> i.e. #cp define storage 3504M standby 2148M
> This leads to a configuration where memory block 27 contains both
> assigned and standby incr.
>
> But, offlining it will not succeed:
> chmem -d 0x00000000d8000000-0x00000000dfffffff
> chmem: Memory Block 27 (0x00000000d8000000-0x00000000dfffffff) disable
> failed: Invalid argument
>
> Hence, I will remove it. Thanks.
Cool!
>
>>> - case MEM_PREPARE_ONLINE:
>>> - /*
>>> - * Access the altmap_start_pfn and altmap_nr_pages fields
>>> - * within the struct memory_notify specifically when dealing
>>> - * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
>>> - *
>>> - * When altmap is in use, take the specified memory range
>>> - * online, which includes the altmap.
>>> - */
>>> - if (arg->altmap_nr_pages) {
>>> - start = PFN_PHYS(arg->altmap_start_pfn);
>>> - size += PFN_PHYS(arg->altmap_nr_pages);
>>> - }
>>> - rc = sclp_mem_change_state(start, size, 1);
>>> - if (rc || !arg->altmap_nr_pages)
>>> - break;
>>> - /*
>>> - * Set CMMA state to nodat here, since the struct page memory
>>> - * at the beginning of the memory block will not go through the
>>> - * buddy allocator later.
>>> - */
>>> - __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
>>> + default:
>>> break;
>>> - case MEM_FINISH_OFFLINE:
>>> + }
>>> + return rc ? NOTIFY_BAD : NOTIFY_OK;
>>> +}
>>> +
>>> +static ssize_t config_mblock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
>>> +{
>>> + struct mblock *mblock = container_of(kobj, struct mblock, kobj);
>>> +
>>> + return sysfs_emit(buf, "%u\n", READ_ONCE(mblock->config));
>>> +}
>>> +
>>> +static ssize_t config_mblock_store(struct kobject *kobj, struct kobj_attribute *attr,
>>> + const char *buf, size_t count)
>>> +{
>>> + unsigned long long addr, block_size;
>>
>> "unsigned long" should be sufficient I'm sure :)
>
> Left over. I will do so.
>
>>> + struct memory_block *mem;
>>> + struct mblock *mblock;
>>> + unsigned char id;
>>> + bool value;
>>> + int rc;
>>> +
>>> + rc = kstrtobool(buf, &value);
>>> + if (rc)
>>> + return rc;
>>> + mblock = container_of(kobj, struct mblock, kobj);
>>> + block_size = memory_block_size_bytes();
>>> + addr = mblock->id * block_size;
>>> + /*
>>> + * Hold device_hotplug_lock when adding/removing memory blocks.
>>> + * Additionally, also protect calls to find_memory_block() and
>>> + * sclp_attach_storage().
>>> + */
>>> + rc = lock_device_hotplug_sysfs();
>>> + if (rc)
>>> + goto out;
>>> + for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
>>> + sclp_attach_storage(id);
>>> + if (value) {
>>> + if (mblock->config)
>>> + goto out_unlock;
>>> + rc = sclp_mem_change_state(addr, block_size, 1);
>>> + if (rc)
>>> + goto out_unlock;
>>> /*
>>> - * When altmap is in use, take the specified memory range
>>> - * offline, which includes the altmap.
>>> + * Set entire memory block CMMA state to nodat. Later, when
>>> + * page tables pages are allocated via __add_memory(), those
>>> + * regions are marked __arch_set_page_dat().
>>> */
>>> - if (arg->altmap_nr_pages) {
>>> - start = PFN_PHYS(arg->altmap_start_pfn);
>>> - size += PFN_PHYS(arg->altmap_nr_pages);
>>> + __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
>>> + rc = __add_memory(0, addr, block_size,
>>> + mblock->memmap_on_memory ?
>>> + MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
>>> + if (rc)
>>> + goto out_unlock;
>>
>> Do we have to undo the state change?
>
> Intention was to keep error handling simple. In case of failure in
> add_memory(), we would have state set to 1 (not given back). But,
> subsequent configuration request for that block will not have an impact.
I mean, if we can cleanup easily here by doing another
sclp_mem_change_state(), I think we should just do that.
I'd assume that sclp_mem_change_state() to 0 will usually not fail (I
might be wrong :) ).
[...]
>>> -static int __init sclp_detect_standby_memory(void)
>>> +static int __init sclp_setup_memory(void)
>>> {
>>> struct read_storage_sccb *sccb;
>>> int i, id, assigned, rc;
>>> + struct mblock *mblocks;
>>> + struct kset *kset;
>>> /* No standby memory in kdump mode */
>>> if (oldmem_data.start)
>>
>> Wouldn't we still want to create the ones for initial memory at least?
>
> Intention was the following:
> configuration and deconfiguration of memory with optional
> memmap-on-memory is mostly needed for only standby memory.
>
> If standby memory is absent or sclp is unavailable, we continue using
> the previous behavior (only software offline/online), since the sclp
> memory notifier was not registered in that case before either.
I mean, probably nobody in the kdump kernel cares about it either way,
agreed.
--
Cheers
David / dhildenb
© 2016 - 2026 Red Hat, Inc.