SPAPR is the last user of numa_get_node() and a bunch of
supporting code to maintain numa_info[x].addr list.
Get LMB node id from pc-dimm list, which allows to
remove ~80LOC maintaining dynamic address range
lookup list.
It also removes pc-dimm dependency on numa_[un]set_mem_node_id()
and makes pc-dimms a sole source of information about which
node it belongs to and removes duplicate data from global
numa_info.
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
Beside making code simpler, my interest in simplification
lies in allowing calling parse_numa_opts() multiple times,
without complex cleanups in case NUMA config is changed
since startup.
PS:
build tested only
---
include/sysemu/numa.h | 10 ------
hw/mem/pc-dimm.c | 2 --
hw/ppc/spapr.c | 29 +++++++++++++++-
numa.c | 94 ---------------------------------------------------
4 files changed, 28 insertions(+), 107 deletions(-)
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 5c6df28..b354521 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,17 +10,10 @@
extern int nb_numa_nodes; /* Number of NUMA nodes */
extern bool have_numa_distance;
-struct numa_addr_range {
- ram_addr_t mem_start;
- ram_addr_t mem_end;
- QLIST_ENTRY(numa_addr_range) entry;
-};
-
struct node_info {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
- QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
uint8_t distance[MAX_NODES];
};
@@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[]);
extern QemuOptsList qemu_numa_opts;
-void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
-void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
-uint32_t numa_get_node(ram_addr_t addr, Error **errp);
void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 66eace5..6e74b61 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
vmstate_register_ram(vmstate_mr, dev);
- numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
out:
error_propagate(errp, local_err);
@@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
- numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
memory_region_del_subregion(&hpms->mr, mr);
vmstate_unregister_ram(vmstate_mr, dev);
}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9efddea..8de0b5b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
}
+static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
+{
+ MemoryDeviceInfoList *info;
+
+ for (info = list; info; info = info->next) {
+ MemoryDeviceInfo *value = info->value;
+
+ if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
+ PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
+
+ if (pcdimm_info->addr >= addr &&
+ addr < (pcdimm_info->addr + pcdimm_info->size)) {
+ return pcdimm_info->node;
+ }
+ }
+ }
+
+ return -1;
+}
+
/*
* Adds ibm,dynamic-reconfiguration-memory node.
* Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
@@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
lmb_size;
uint32_t *int_buf, *cur_index, buf_len;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+ MemoryDeviceInfoList *dimms = NULL;
/*
* Don't create the node if there is no hotpluggable memory
@@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
goto out;
}
+ if (hotplug_lmb_start) {
+ MemoryDeviceInfoList **prev = &dimms;
+ qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
+ }
+
/* ibm,dynamic-memory */
int_buf[0] = cpu_to_be32(nr_lmbs);
cur_index++;
@@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
dynamic_memory[3] = cpu_to_be32(0); /* reserved */
- dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
+ dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
if (memory_region_present(get_system_memory(), addr)) {
dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
} else {
@@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
}
+ qapi_free_MemoryDeviceInfoList(dimms);
ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
if (ret < 0) {
goto out;
diff --git a/numa.c b/numa.c
index 7151b24..98fa9a4 100644
--- a/numa.c
+++ b/numa.c
@@ -55,92 +55,6 @@ int nb_numa_nodes;
bool have_numa_distance;
NodeInfo numa_info[MAX_NODES];
-void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
-{
- struct numa_addr_range *range;
-
- /*
- * Memory-less nodes can come here with 0 size in which case,
- * there is nothing to do.
- */
- if (!size) {
- return;
- }
-
- range = g_malloc0(sizeof(*range));
- range->mem_start = addr;
- range->mem_end = addr + size - 1;
- QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
-}
-
-void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
-{
- struct numa_addr_range *range, *next;
-
- QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
- if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
- QLIST_REMOVE(range, entry);
- g_free(range);
- return;
- }
- }
-}
-
-static void numa_set_mem_ranges(void)
-{
- int i;
- ram_addr_t mem_start = 0;
-
- /*
- * Deduce start address of each node and use it to store
- * the address range info in numa_info address range list
- */
- for (i = 0; i < nb_numa_nodes; i++) {
- numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
- mem_start += numa_info[i].node_mem;
- }
-}
-
-/*
- * Check if @addr falls under NUMA @node.
- */
-static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
-{
- struct numa_addr_range *range;
-
- QLIST_FOREACH(range, &numa_info[node].addr, entry) {
- if (addr >= range->mem_start && addr <= range->mem_end) {
- return true;
- }
- }
- return false;
-}
-
-/*
- * Given an address, return the index of the NUMA node to which the
- * address belongs to.
- */
-uint32_t numa_get_node(ram_addr_t addr, Error **errp)
-{
- uint32_t i;
-
- /* For non NUMA configurations, check if the addr falls under node 0 */
- if (!nb_numa_nodes) {
- if (numa_addr_belongs_to_node(addr, 0)) {
- return 0;
- }
- }
-
- for (i = 0; i < nb_numa_nodes; i++) {
- if (numa_addr_belongs_to_node(addr, i)) {
- return i;
- }
- }
-
- error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
- "NUMA node", addr);
- return -1;
-}
static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
Error **errp)
@@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms)
exit(1);
}
- for (i = 0; i < nb_numa_nodes; i++) {
- QLIST_INIT(&numa_info[i].addr);
- }
-
- numa_set_mem_ranges();
-
/* QEMU needs at least all unique node pair distances to build
* the whole NUMA distance table. QEMU treats the distance table
* as symmetric by default, i.e. distance A->B == distance B->A.
@@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms)
/* Validation succeeded, now fill in any missing distances. */
complete_init_numa_distance();
}
- } else {
- numa_set_mem_node_id(0, ram_size, 0);
}
}
--
2.7.4
On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote: > SPAPR is the last user of numa_get_node() and a bunch of > supporting code to maintain numa_info[x].addr list. > > Get LMB node id from pc-dimm list, which allows to > remove ~80LOC maintaining dynamic address range > lookup list. > > It also removes pc-dimm dependency on numa_[un]set_mem_node_id() > and makes pc-dimms a sole source of information about which > node it belongs to and removes duplicate data from global > numa_info. > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > --- > Beside making code simpler, my interest in simplification > lies in allowing calling parse_numa_opts() multiple times, > without complex cleanups in case NUMA config is changed > since startup. > > PS: > build tested only > --- > include/sysemu/numa.h | 10 ------ > hw/mem/pc-dimm.c | 2 -- > hw/ppc/spapr.c | 29 +++++++++++++++- > numa.c | 94 --------------------------------------------------- > 4 files changed, 28 insertions(+), 107 deletions(-) Applied to ppc-for-2.12. It definitely seems like an improvement over what we have. Looking back at the DIMM list from QMP in the loop seems a little roundabout though. Maybe we'd be better stepping through the DIMMs, then stepping through the LMBs within each DIMM, rather than just stepping through the LMBs directly. > > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index 5c6df28..b354521 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -10,17 +10,10 @@ > extern int nb_numa_nodes; /* Number of NUMA nodes */ > extern bool have_numa_distance; > > -struct numa_addr_range { > - ram_addr_t mem_start; > - ram_addr_t mem_end; > - QLIST_ENTRY(numa_addr_range) entry; > -}; > - > struct node_info { > uint64_t node_mem; > struct HostMemoryBackend *node_memdev; > bool present; > - QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ > uint8_t distance[MAX_NODES]; > }; > > @@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES]; > void parse_numa_opts(MachineState *ms); > void query_numa_node_mem(NumaNodeMem node_mem[]); > extern QemuOptsList qemu_numa_opts; > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > -uint32_t numa_get_node(ram_addr_t addr, Error **errp); > void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, > int nb_nodes, ram_addr_t size); > void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c > index 66eace5..6e74b61 100644 > --- a/hw/mem/pc-dimm.c > +++ b/hw/mem/pc-dimm.c > @@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, > > memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); > vmstate_register_ram(vmstate_mr, dev); > - numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); > > out: > error_propagate(errp, local_err); > @@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, > PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm); > > - numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); > memory_region_del_subregion(&hpms->mr, mr); > vmstate_unregister_ram(vmstate_mr, dev); > } > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 9efddea..8de0b5b 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) > > } > > +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr) > +{ > + MemoryDeviceInfoList *info; > + > + for (info = list; info; info = info->next) { > + MemoryDeviceInfo *value = info->value; > + > + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) { > + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data; > + > + if (pcdimm_info->addr >= addr && > + addr < (pcdimm_info->addr + pcdimm_info->size)) { > + return pcdimm_info->node; > + } > + } > + } > + > + return -1; > +} > + > /* > * Adds ibm,dynamic-reconfiguration-memory node. > * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation > @@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > lmb_size; > uint32_t *int_buf, *cur_index, buf_len; > int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; > + MemoryDeviceInfoList *dimms = NULL; > > /* > * Don't create the node if there is no hotpluggable memory > @@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > goto out; > } > > + if (hotplug_lmb_start) { > + MemoryDeviceInfoList **prev = &dimms; > + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); > + } > + > /* ibm,dynamic-memory */ > int_buf[0] = cpu_to_be32(nr_lmbs); > cur_index++; > @@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); > dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc)); > dynamic_memory[3] = cpu_to_be32(0); /* reserved */ > - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); > + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr)); > if (memory_region_present(get_system_memory(), addr)) { > dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); > } else { > @@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > > cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; > } > + qapi_free_MemoryDeviceInfoList(dimms); > ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); > if (ret < 0) { > goto out; > diff --git a/numa.c b/numa.c > index 7151b24..98fa9a4 100644 > --- a/numa.c > +++ b/numa.c > @@ -55,92 +55,6 @@ int nb_numa_nodes; > bool have_numa_distance; > NodeInfo numa_info[MAX_NODES]; > > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) > -{ > - struct numa_addr_range *range; > - > - /* > - * Memory-less nodes can come here with 0 size in which case, > - * there is nothing to do. > - */ > - if (!size) { > - return; > - } > - > - range = g_malloc0(sizeof(*range)); > - range->mem_start = addr; > - range->mem_end = addr + size - 1; > - QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); > -} > - > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) > -{ > - struct numa_addr_range *range, *next; > - > - QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { > - if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { > - QLIST_REMOVE(range, entry); > - g_free(range); > - return; > - } > - } > -} > - > -static void numa_set_mem_ranges(void) > -{ > - int i; > - ram_addr_t mem_start = 0; > - > - /* > - * Deduce start address of each node and use it to store > - * the address range info in numa_info address range list > - */ > - for (i = 0; i < nb_numa_nodes; i++) { > - numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); > - mem_start += numa_info[i].node_mem; > - } > -} > - > -/* > - * Check if @addr falls under NUMA @node. > - */ > -static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) > -{ > - struct numa_addr_range *range; > - > - QLIST_FOREACH(range, &numa_info[node].addr, entry) { > - if (addr >= range->mem_start && addr <= range->mem_end) { > - return true; > - } > - } > - return false; > -} > - > -/* > - * Given an address, return the index of the NUMA node to which the > - * address belongs to. > - */ > -uint32_t numa_get_node(ram_addr_t addr, Error **errp) > -{ > - uint32_t i; > - > - /* For non NUMA configurations, check if the addr falls under node 0 */ > - if (!nb_numa_nodes) { > - if (numa_addr_belongs_to_node(addr, 0)) { > - return 0; > - } > - } > - > - for (i = 0; i < nb_numa_nodes; i++) { > - if (numa_addr_belongs_to_node(addr, i)) { > - return i; > - } > - } > - > - error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " > - "NUMA node", addr); > - return -1; > -} > > static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, > Error **errp) > @@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms) > exit(1); > } > > - for (i = 0; i < nb_numa_nodes; i++) { > - QLIST_INIT(&numa_info[i].addr); > - } > - > - numa_set_mem_ranges(); > - > /* QEMU needs at least all unique node pair distances to build > * the whole NUMA distance table. QEMU treats the distance table > * as symmetric by default, i.e. distance A->B == distance B->A. > @@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms) > /* Validation succeeded, now fill in any missing distances. */ > complete_init_numa_distance(); > } > - } else { > - numa_set_mem_node_id(0, ram_size, 0); > } > } > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
On Wed, 6 Dec 2017 11:14:06 +1100 David Gibson <david@gibson.dropbear.id.au> wrote: > On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote: > > SPAPR is the last user of numa_get_node() and a bunch of > > supporting code to maintain numa_info[x].addr list. > > > > Get LMB node id from pc-dimm list, which allows to > > remove ~80LOC maintaining dynamic address range > > lookup list. > > > > It also removes pc-dimm dependency on numa_[un]set_mem_node_id() > > and makes pc-dimms a sole source of information about which > > node it belongs to and removes duplicate data from global > > numa_info. > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > > --- > > Beside making code simpler, my interest in simplification > > lies in allowing calling parse_numa_opts() multiple times, > > without complex cleanups in case NUMA config is changed > > since startup. > > > > PS: > > build tested only > > --- > > include/sysemu/numa.h | 10 ------ > > hw/mem/pc-dimm.c | 2 -- > > hw/ppc/spapr.c | 29 +++++++++++++++- > > numa.c | 94 --------------------------------------------------- > > 4 files changed, 28 insertions(+), 107 deletions(-) > > Applied to ppc-for-2.12. Thanks > > It definitely seems like an improvement over what we have. Looking > back at the DIMM list from QMP in the loop seems a little roundabout > though. Maybe we'd be better stepping through the DIMMs, then > stepping through the LMBs within each DIMM, rather than just stepping > through the LMBs directly. Surely that would be better, maybe someone from ppc side would take care of it. > > > > > > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > > index 5c6df28..b354521 100644 > > --- a/include/sysemu/numa.h > > +++ b/include/sysemu/numa.h > > @@ -10,17 +10,10 @@ > > extern int nb_numa_nodes; /* Number of NUMA nodes */ > > extern bool have_numa_distance; > > > > -struct numa_addr_range { > > - ram_addr_t mem_start; > > - ram_addr_t mem_end; > > - QLIST_ENTRY(numa_addr_range) entry; > > -}; > > - > > struct node_info { > > uint64_t node_mem; > > struct HostMemoryBackend *node_memdev; > > bool present; > > - QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ > > uint8_t distance[MAX_NODES]; > > }; > > > > @@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES]; > > void parse_numa_opts(MachineState *ms); > > void query_numa_node_mem(NumaNodeMem node_mem[]); > > extern QemuOptsList qemu_numa_opts; > > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > > -uint32_t numa_get_node(ram_addr_t addr, Error **errp); > > void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, > > int nb_nodes, ram_addr_t size); > > void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, > > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c > > index 66eace5..6e74b61 100644 > > --- a/hw/mem/pc-dimm.c > > +++ b/hw/mem/pc-dimm.c > > @@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, > > > > memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); > > vmstate_register_ram(vmstate_mr, dev); > > - numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); > > > > out: > > error_propagate(errp, local_err); > > @@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, > > PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > > MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm); > > > > - numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); > > memory_region_del_subregion(&hpms->mr, mr); > > vmstate_unregister_ram(vmstate_mr, dev); > > } > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 9efddea..8de0b5b 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) > > > > } > > > > +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr) > > +{ > > + MemoryDeviceInfoList *info; > > + > > + for (info = list; info; info = info->next) { > > + MemoryDeviceInfo *value = info->value; > > + > > + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) { > > + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data; > > + > > + if (pcdimm_info->addr >= addr && > > + addr < (pcdimm_info->addr + pcdimm_info->size)) { > > + return pcdimm_info->node; > > + } > > + } > > + } > > + > > + return -1; > > +} > > + > > /* > > * Adds ibm,dynamic-reconfiguration-memory node. > > * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation > > @@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > > lmb_size; > > uint32_t *int_buf, *cur_index, buf_len; > > int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; > > + MemoryDeviceInfoList *dimms = NULL; > > > > /* > > * Don't create the node if there is no hotpluggable memory > > @@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > > goto out; > > } > > > > + if (hotplug_lmb_start) { > > + MemoryDeviceInfoList **prev = &dimms; > > + qmp_pc_dimm_device_list(qdev_get_machine(), &prev); > > + } > > + > > /* ibm,dynamic-memory */ > > int_buf[0] = cpu_to_be32(nr_lmbs); > > cur_index++; > > @@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > > dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); > > dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc)); > > dynamic_memory[3] = cpu_to_be32(0); /* reserved */ > > - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); > > + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr)); > > if (memory_region_present(get_system_memory(), addr)) { > > dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); > > } else { > > @@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > > > > cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; > > } > > + qapi_free_MemoryDeviceInfoList(dimms); > > ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); > > if (ret < 0) { > > goto out; > > diff --git a/numa.c b/numa.c > > index 7151b24..98fa9a4 100644 > > --- a/numa.c > > +++ b/numa.c > > @@ -55,92 +55,6 @@ int nb_numa_nodes; > > bool have_numa_distance; > > NodeInfo numa_info[MAX_NODES]; > > > > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) > > -{ > > - struct numa_addr_range *range; > > - > > - /* > > - * Memory-less nodes can come here with 0 size in which case, > > - * there is nothing to do. > > - */ > > - if (!size) { > > - return; > > - } > > - > > - range = g_malloc0(sizeof(*range)); > > - range->mem_start = addr; > > - range->mem_end = addr + size - 1; > > - QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); > > -} > > - > > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) > > -{ > > - struct numa_addr_range *range, *next; > > - > > - QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { > > - if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { > > - QLIST_REMOVE(range, entry); > > - g_free(range); > > - return; > > - } > > - } > > -} > > - > > -static void numa_set_mem_ranges(void) > > -{ > > - int i; > > - ram_addr_t mem_start = 0; > > - > > - /* > > - * Deduce start address of each node and use it to store > > - * the address range info in numa_info address range list > > - */ > > - for (i = 0; i < nb_numa_nodes; i++) { > > - numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); > > - mem_start += numa_info[i].node_mem; > > - } > > -} > > - > > -/* > > - * Check if @addr falls under NUMA @node. > > - */ > > -static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) > > -{ > > - struct numa_addr_range *range; > > - > > - QLIST_FOREACH(range, &numa_info[node].addr, entry) { > > - if (addr >= range->mem_start && addr <= range->mem_end) { > > - return true; > > - } > > - } > > - return false; > > -} > > - > > -/* > > - * Given an address, return the index of the NUMA node to which the > > - * address belongs to. > > - */ > > -uint32_t numa_get_node(ram_addr_t addr, Error **errp) > > -{ > > - uint32_t i; > > - > > - /* For non NUMA configurations, check if the addr falls under node 0 */ > > - if (!nb_numa_nodes) { > > - if (numa_addr_belongs_to_node(addr, 0)) { > > - return 0; > > - } > > - } > > - > > - for (i = 0; i < nb_numa_nodes; i++) { > > - if (numa_addr_belongs_to_node(addr, i)) { > > - return i; > > - } > > - } > > - > > - error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " > > - "NUMA node", addr); > > - return -1; > > -} > > > > static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, > > Error **errp) > > @@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms) > > exit(1); > > } > > > > - for (i = 0; i < nb_numa_nodes; i++) { > > - QLIST_INIT(&numa_info[i].addr); > > - } > > - > > - numa_set_mem_ranges(); > > - > > /* QEMU needs at least all unique node pair distances to build > > * the whole NUMA distance table. QEMU treats the distance table > > * as symmetric by default, i.e. distance A->B == distance B->A. > > @@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms) > > /* Validation succeeded, now fill in any missing distances. */ > > complete_init_numa_distance(); > > } > > - } else { > > - numa_set_mem_node_id(0, ram_size, 0); > > } > > } > > >
On Wed, Dec 06, 2017 at 10:57:32AM +0100, Igor Mammedov wrote: > On Wed, 6 Dec 2017 11:14:06 +1100 > David Gibson <david@gibson.dropbear.id.au> wrote: > > > On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote: > > > SPAPR is the last user of numa_get_node() and a bunch of > > > supporting code to maintain numa_info[x].addr list. > > > > > > Get LMB node id from pc-dimm list, which allows to > > > remove ~80LOC maintaining dynamic address range > > > lookup list. > > > > > > It also removes pc-dimm dependency on numa_[un]set_mem_node_id() > > > and makes pc-dimms a sole source of information about which > > > node it belongs to and removes duplicate data from global > > > numa_info. > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com> > > > --- > > > Beside making code simpler, my interest in simplification > > > lies in allowing calling parse_numa_opts() multiple times, > > > without complex cleanups in case NUMA config is changed > > > since startup. > > > > > > PS: > > > build tested only > > > --- > > > include/sysemu/numa.h | 10 ------ > > > hw/mem/pc-dimm.c | 2 -- > > > hw/ppc/spapr.c | 29 +++++++++++++++- > > > numa.c | 94 --------------------------------------------------- > > > 4 files changed, 28 insertions(+), 107 deletions(-) > > > > Applied to ppc-for-2.12. > Thanks > > > > > It definitely seems like an improvement over what we have. Looking > > back at the DIMM list from QMP in the loop seems a little roundabout > > though. Maybe we'd be better stepping through the DIMMs, then > > stepping through the LMBs within each DIMM, rather than just stepping > > through the LMBs directly. > Surely that would be better, maybe someone from ppc side would take care > of it. Well, it's now on my vast list of things to look at if I ever have time.. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
© 2016 - 2024 Red Hat, Inc.