From: Bing Niu <bing.niu@intel.com>
Add memory bandwidth allocation support to virresctrl class.
Introducing virResctrlAllocMemBW which is used for allocating memory
bandwidth. Following virResctrlAllocPerType, it also employs a
nested sparse array to indicate whether allocation is available for
particular last level cache.
Signed-off-by: Bing Niu <bing.niu@intel.com>
---
src/util/virresctrl.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/util/virresctrl.h | 13 ++
2 files changed, 346 insertions(+), 13 deletions(-)
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 06e2702..bec2afd 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")
/* Resctrl is short for Resource Control. It might be implemented for various
- * resources, but at the time of this writing this is only supported for cache
- * allocation technology (aka CAT). Hence the reson for leaving 'Cache' out of
- * all the structure and function names for now (can be added later if needed.
+ * resources. Currently this supports cache allocation technology (aka CAT) and
+ * memory bandwidth allocation (aka MBA). More resources technologies may be
+ * added in feature.
*/
@@ -89,6 +89,8 @@ typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+typedef struct _virResctrlAllocMemBW virResctrlAllocMemBW;
+typedef virResctrlAllocMemBW *virResctrlAllocMemBWPtr;
/* Class definitions and initializations */
static virClassPtr virResctrlInfoClass;
@@ -181,7 +183,10 @@ virResctrlInfoDispose(void *obj)
* consequently a directory under /sys/fs/resctrl). Since it can have multiple
* parts of multiple caches allocated it is represented as bunch of nested
* sparse arrays (by sparse I mean array of pointers so that each might be NULL
- * in case there is no allocation for that particular one (level, cache, ...)).
+ * in case there is no allocation for that particular cache allocation (level,
+ * cache, ...) or memory allocation for particular node).
+ *
+ * =====Cache allocation technology (CAT)=====
*
* Since one allocation can be made for caches on different levels, the first
* nested sparse array is of types virResctrlAllocPerLevel. For example if you
@@ -206,6 +211,16 @@ virResctrlInfoDispose(void *obj)
* all of them. While doing that we store the bitmask in a sparse array of
* virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
* of the sparse arrays are stored in nmasks or nsizes, respectively.
+ *
+ * =====Memory Bandwidth allocation technology (MBA)=====
+ *
+ * The memory bandwidth allocation support in virResctrlAlloc works in the same
+ * fashion as CAT. However, memory bandwidth controller doesn't have a hierarchy
+ * organization as cache, each node have one memory bandwidth controller to
+ * memory bandwidth distribution. The number of memory bandwidth controller is
+ * identical with number of last level cache. So MBA also employs a sparse array
+ * to represent whether a memory bandwidth allocation happens on corresponding node.
+ * The available memory controller number is collected in 'virResctrlInfo'.
*/
struct _virResctrlAllocPerType {
/* There could be bool saying whether this is set or not, but since everything
@@ -226,12 +241,24 @@ struct _virResctrlAllocPerLevel {
* VIR_CACHE_TYPE_LAST number of items */
};
+/*
+ * virResctrlAllocMemBW represents one memory bandwidth allocation. Since it can have
+ * several last level caches in a NUMA system, it is also represented as a nested
+ * sparse arrays as virRestrlAllocPerLevel.
+ */
+struct _virResctrlAllocMemBW {
+ unsigned int **bandwidths;
+ size_t nbandwidths;
+};
+
struct _virResctrlAlloc {
virObject parent;
virResctrlAllocPerLevelPtr *levels;
size_t nlevels;
+ virResctrlAllocMemBWPtr mem_bw;
+
/* The identifier (any unique string for now) */
char *id;
/* libvirt-generated path in /sys/fs/resctrl for this particular
@@ -275,6 +302,13 @@ virResctrlAllocDispose(void *obj)
VIR_FREE(level);
}
+ if (alloc->mem_bw) {
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+ for (i = 0; i < mem_bw->nbandwidths; i++)
+ VIR_FREE(mem_bw->bandwidths[i]);
+ }
+
+ VIR_FREE(alloc->mem_bw);
VIR_FREE(alloc->id);
VIR_FREE(alloc->path);
VIR_FREE(alloc->levels);
@@ -697,6 +731,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr alloc)
if (!alloc)
return true;
+ if (alloc->mem_bw)
+ return false;
+
for (i = 0; i < alloc->nlevels; i++) {
virResctrlAllocPerLevelPtr a_level = alloc->levels[i];
@@ -890,6 +927,27 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
int
+virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
+ virResctrlAllocForeachMemoryCallback cb,
+ void *opaque)
+{
+ size_t i = 0;
+
+ if (!alloc)
+ return 0;
+
+ if (alloc->mem_bw) {
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+ for (i = 0; i < mem_bw->nbandwidths; i++)
+ if (mem_bw->bandwidths[i])
+ cb(i, *mem_bw->bandwidths[i], opaque);
+ }
+
+ return 0;
+}
+
+
+int
virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
virResctrlAllocForeachCacheCallback cb,
void *opaque)
@@ -952,6 +1010,240 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
}
+static void
+virResctrlMemoryBandwidthSubtract(virResctrlAllocPtr free,
+ virResctrlAllocPtr used)
+{
+ size_t i;
+
+ if (!used->mem_bw)
+ return;
+
+ for (i = 0; i < used->mem_bw->nbandwidths; i++) {
+ if (used->mem_bw->bandwidths[i])
+ *(free->mem_bw->bandwidths[i]) -= *(used->mem_bw->bandwidths[i]);
+ }
+}
+
+
+int
+virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
+ unsigned int id,
+ unsigned int memory_bandwidth)
+{
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+
+ if (!mem_bw) {
+ if (VIR_ALLOC(mem_bw) < 0)
+ return -1;
+ alloc->mem_bw = mem_bw;
+ }
+
+ if (mem_bw->nbandwidths <= id &&
+ VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
+ id - mem_bw->nbandwidths + 1) < 0)
+ return -1;
+
+ if (mem_bw->bandwidths[id]) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Memory Bandwidth already defined for node %u"),
+ id);
+ return -1;
+ }
+
+ if (VIR_ALLOC(mem_bw->bandwidths[id]) < 0)
+ return -1;
+
+ *(mem_bw->bandwidths[id]) = memory_bandwidth;
+ return 0;
+}
+
+
+static int
+virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
+ virBufferPtr buf)
+{
+ size_t i;
+
+ if (!alloc->mem_bw)
+ return 0;
+
+ virBufferAddLit(buf, "MB:");
+
+ for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
+ if (alloc->mem_bw->bandwidths[i]) {
+ virBufferAsprintf(buf, "%zd=%u;", i,
+ *(alloc->mem_bw->bandwidths[i]));
+ }
+ }
+
+ virBufferTrim(buf, ";", 1);
+ virBufferAddChar(buf, '\n');
+ if (virBufferCheckError(buf) < 0)
+ return -1;
+ else
+ return 0;
+}
+
+
+static int
+virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ virResctrlAllocPtr free)
+{
+ size_t i;
+ virResctrlAllocMemBWPtr mem_bw_alloc = alloc->mem_bw;
+ virResctrlAllocMemBWPtr mem_bw_free = free->mem_bw;
+ virResctrlInfoMemBWPtr mem_bw_info = resctrl->membw_info;
+
+ if (!mem_bw_alloc)
+ return 0;
+
+ if (mem_bw_alloc && !mem_bw_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("RDT Memory Bandwidth allocation "
+ "unsupported"));
+ return -1;
+ }
+
+ for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
+ if (!mem_bw_alloc->bandwidths[i])
+ continue;
+
+ if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Memory Bandwidth allocation of size "
+ "%u is not divisible by granularity %u"),
+ *(mem_bw_alloc->bandwidths[i]),
+ mem_bw_info->bandwidth_granularity);
+ return -1;
+ }
+ if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Memory Bandwidth allocation of size "
+ "%u is smaller than the minimum "
+ "allowed allocation %u"),
+ *(mem_bw_alloc->bandwidths[i]),
+ mem_bw_info->min_bandwidth);
+ return -1;
+ }
+ if (i > mem_bw_info->max_id) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("bandwidth controller %zd not exist, "
+ "max controller id %u"),
+ i, mem_bw_info->max_id);
+ return -1;
+ }
+ if (*(mem_bw_alloc->bandwidths[i]) > *(mem_bw_free->bandwidths[i])) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of %u%% "
+ "bandwidth on node %zd, available bandwidth %u%%"),
+ *(mem_bw_alloc->bandwidths[i]), i,
+ *(mem_bw_free->bandwidths[i]));
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+static int
+virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ char *mem_bw)
+{
+ unsigned int bandwidth;
+ unsigned int id;
+ char *tmp = NULL;
+
+ tmp = strchr(mem_bw, '=');
+ if (!tmp)
+ return 0;
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid node id %u "), id);
+ return -1;
+ }
+ if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid bandwidth %u"), bandwidth);
+ return -1;
+ }
+ if (bandwidth < resctrl->membw_info->min_bandwidth ||
+ id > resctrl->membw_info->max_id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Missing or inconsistent resctrl info for "
+ "memory bandwidth node '%u'"), id);
+ return -1;
+ }
+ if (alloc->mem_bw->nbandwidths <= id &&
+ VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
+ id - alloc->mem_bw->nbandwidths + 1) < 0) {
+ return -1;
+ }
+ if (!alloc->mem_bw->bandwidths[id]) {
+ if (VIR_ALLOC(alloc->mem_bw->bandwidths[id]) < 0)
+ return -1;
+ }
+
+ *(alloc->mem_bw->bandwidths[id]) = bandwidth;
+ return 0;
+}
+
+
+static int
+virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ char *line)
+{
+ char **mbs = NULL;
+ char *tmp = NULL;
+ size_t nmbs = 0;
+ size_t i;
+ int ret = -1;
+
+ /* For no reason there can be spaces */
+ virSkipSpaces((const char **) &line);
+
+ if (STRNEQLEN(line, "MB", 2))
+ return 0;
+
+ if (!resctrl || !resctrl->membw_info ||
+ !resctrl->membw_info->min_bandwidth ||
+ !resctrl->membw_info->bandwidth_granularity) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Missing or inconsistent resctrl info for "
+ "memory bandwidth allocation"));
+ }
+
+ if (!alloc->mem_bw) {
+ if (VIR_ALLOC(alloc->mem_bw) < 0)
+ return -1;
+ }
+
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+ tmp++;
+
+ mbs = virStringSplitCount(tmp, ";", 0, &nmbs);
+ if (nmbs == 0)
+ return 0;
+
+ for (i = 0; i < nmbs; i++) {
+ if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, mbs[i]) < 0)
+ goto cleanup;
+ }
+ ret = 0;
+ cleanup:
+ virStringListFree(mbs);
+ return ret;
+}
+
+
static int
virResctrlAllocFormatCache(virResctrlAllocPtr alloc, virBufferPtr buf)
{
@@ -1013,6 +1305,11 @@ virResctrlAllocFormat(virResctrlAllocPtr alloc)
return NULL;
}
+ if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
return virBufferContentAndReset(&buf);
}
@@ -1139,6 +1436,8 @@ virResctrlAllocParse(virResctrlInfoPtr resctrl,
lines = virStringSplitCount(schemata, "\n", 0, &nlines);
for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, lines[i]) < 0)
+ goto cleanup;
if (virResctrlAllocParseCacheLine(resctrl, alloc, lines[i]) < 0)
goto cleanup;
}
@@ -1273,6 +1572,22 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
}
}
+ /* set default free memory bandwidth to 100%*/
+ if (info->membw_info) {
+ if (VIR_ALLOC(ret->mem_bw) < 0)
+ goto error;
+
+ if (VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
+ info->membw_info->max_id + 1) < 0)
+ goto error;
+
+ for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
+ if (VIR_ALLOC(ret->mem_bw->bandwidths[i]) < 0)
+ goto error;
+ *(ret->mem_bw->bandwidths[i]) = 100;
+ }
+ }
+
cleanup:
virBitmapFree(mask);
return ret;
@@ -1284,13 +1599,14 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
/*
* This function creates an allocation that represents all unused parts of all
- * caches in the system. It uses virResctrlInfo for creating a new full
- * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
- * scans for all allocations under /sys/fs/resctrl and subtracts each one of
- * them from it. That way it can then return an allocation with only bit set
- * being those that are not mentioned in any other allocation. It is used for
- * two things, a) calculating the masks when creating allocations and b) from
- * tests.
+ * caches and memory bandwidth in the system. It uses virResctrlInfo for
+ * creating a new full allocation with all bits set (using
+ * virResctrlAllocNewFromInfo()), memory bandwidth 100% and then scans
+ * for all allocations under /sys/fs/resctrl and subtracts each one of them
+ * from it. That way it can then return an allocation with only bit set
+ * being those that are not mentioned in any other allocation for CAT and
+ * available memory bandwidth for MBA. It is used for two things, a) calculating
+ * the masks and bandwidth available when creating allocations and b) from tests.
*/
virResctrlAllocPtr
virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
@@ -1336,6 +1652,7 @@ virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
goto error;
}
+ virResctrlMemoryBandwidthSubtract(ret, alloc);
virResctrlAllocSubtract(ret, alloc);
virObjectUnref(alloc);
alloc = NULL;
@@ -1526,8 +1843,8 @@ virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
/*
* This function is called when creating an allocation in the system. What it
- * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
- * then tries to find a proper space for every requested allocation effectively
+ * does is that it gets all the unused resources using virResctrlAllocGetUnused()
+ * and then tries to find a proper space for every requested allocation effectively
* transforming `sizes` into `masks`.
*/
static int
@@ -1547,6 +1864,9 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
if (!alloc_default)
goto cleanup;
+ if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
+ goto cleanup;
+
if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
goto cleanup;
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index d657c06..d43fd31 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -73,6 +73,10 @@ typedef int virResctrlAllocForeachCacheCallback(unsigned int level,
unsigned long long size,
void *opaque);
+typedef int virResctrlAllocForeachMemoryCallback(unsigned int id,
+ unsigned int size,
+ void *opaque);
+
virResctrlAllocPtr
virResctrlAllocNew(void);
@@ -85,6 +89,15 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
virCacheType type,
unsigned int cache,
unsigned long long size);
+int
+virResctrlAllocForeachMemory(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachMemoryCallback cb,
+ void *opaque);
+
+int
+virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
+ unsigned int id,
+ unsigned int memory_bandwidth);
int
virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
--
2.7.4
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 07/18/2018 03:57 AM, bing.niu@intel.com wrote:
> From: Bing Niu <bing.niu@intel.com>
>
> Add memory bandwidth allocation support to virresctrl class.
> Introducing virResctrlAllocMemBW which is used for allocating memory
> bandwidth. Following virResctrlAllocPerType, it also employs a
> nested sparse array to indicate whether allocation is available for
> particular last level cache.
>
> Signed-off-by: Bing Niu <bing.niu@intel.com>
> ---
> src/util/virresctrl.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++--
> src/util/virresctrl.h | 13 ++
> 2 files changed, 346 insertions(+), 13 deletions(-)
>
> diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
> index 06e2702..bec2afd 100644
> --- a/src/util/virresctrl.c
> +++ b/src/util/virresctrl.c
> @@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")
>
>
> /* Resctrl is short for Resource Control. It might be implemented for various
> - * resources, but at the time of this writing this is only supported for cache
> - * allocation technology (aka CAT). Hence the reson for leaving 'Cache' out of
> - * all the structure and function names for now (can be added later if needed.
> + * resources. Currently this supports cache allocation technology (aka CAT) and
> + * memory bandwidth allocation (aka MBA). More resources technologies may be
> + * added in feature.
s/feature/the future/
> */
>
>
> @@ -89,6 +89,8 @@ typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
> typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
> typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
>
> +typedef struct _virResctrlAllocMemBW virResctrlAllocMemBW;
> +typedef virResctrlAllocMemBW *virResctrlAllocMemBWPtr;
>
> /* Class definitions and initializations */
> static virClassPtr virResctrlInfoClass;
> @@ -181,7 +183,10 @@ virResctrlInfoDispose(void *obj)
> * consequently a directory under /sys/fs/resctrl). Since it can have multiple
> * parts of multiple caches allocated it is represented as bunch of nested
> * sparse arrays (by sparse I mean array of pointers so that each might be NULL
> - * in case there is no allocation for that particular one (level, cache, ...)).
> + * in case there is no allocation for that particular cache allocation (level,
> + * cache, ...) or memory allocation for particular node).
> + *
> + * =====Cache allocation technology (CAT)=====
> *
> * Since one allocation can be made for caches on different levels, the first
> * nested sparse array is of types virResctrlAllocPerLevel. For example if you
> @@ -206,6 +211,16 @@ virResctrlInfoDispose(void *obj)
> * all of them. While doing that we store the bitmask in a sparse array of
> * virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
> * of the sparse arrays are stored in nmasks or nsizes, respectively.
> + *
> + * =====Memory Bandwidth allocation technology (MBA)=====
> + *
> + * The memory bandwidth allocation support in virResctrlAlloc works in the same
> + * fashion as CAT. However, memory bandwidth controller doesn't have a hierarchy
> + * organization as cache, each node have one memory bandwidth controller to
> + * memory bandwidth distribution. The number of memory bandwidth controller is
> + * identical with number of last level cache. So MBA also employs a sparse array
> + * to represent whether a memory bandwidth allocation happens on corresponding node.
> + * The available memory controller number is collected in 'virResctrlInfo'.
> */
> struct _virResctrlAllocPerType {
> /* There could be bool saying whether this is set or not, but since everything
> @@ -226,12 +241,24 @@ struct _virResctrlAllocPerLevel {
> * VIR_CACHE_TYPE_LAST number of items */
> };
>
> +/*
> + * virResctrlAllocMemBW represents one memory bandwidth allocation. Since it can have
> + * several last level caches in a NUMA system, it is also represented as a nested
> + * sparse arrays as virRestrlAllocPerLevel.
> + */
> +struct _virResctrlAllocMemBW {
> + unsigned int **bandwidths;
> + size_t nbandwidths;
> +};
> +
> struct _virResctrlAlloc {
> virObject parent;
>
> virResctrlAllocPerLevelPtr *levels;
> size_t nlevels;
>
> + virResctrlAllocMemBWPtr mem_bw;
> +
> /* The identifier (any unique string for now) */
> char *id;
> /* libvirt-generated path in /sys/fs/resctrl for this particular
> @@ -275,6 +302,13 @@ virResctrlAllocDispose(void *obj)
> VIR_FREE(level);
> }
>
> + if (alloc->mem_bw) {
> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
> + for (i = 0; i < mem_bw->nbandwidths; i++)
> + VIR_FREE(mem_bw->bandwidths[i]);
> + }
> +
> + VIR_FREE(alloc->mem_bw);
NIT: Could be inside the if condition
> VIR_FREE(alloc->id);
> VIR_FREE(alloc->path);
> VIR_FREE(alloc->levels);
> @@ -697,6 +731,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr alloc)
> if (!alloc)
> return true;
>
> + if (alloc->mem_bw)
> + return false;
> +
> for (i = 0; i < alloc->nlevels; i++) {
> virResctrlAllocPerLevelPtr a_level = alloc->levels[i];
>
> @@ -890,6 +927,27 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
>
>
> int
> +virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
> + virResctrlAllocForeachMemoryCallback cb,
> + void *opaque)
> +{
> + size_t i = 0;
> +
> + if (!alloc)
> + return 0;
> +
> + if (alloc->mem_bw) {
> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
> + for (i = 0; i < mem_bw->nbandwidths; i++)
> + if (mem_bw->bandwidths[i])
> + cb(i, *mem_bw->bandwidths[i], opaque);
@cb is an int function, but only ever used in what amounts to a void
function in patch8 (virDomainMemorytuneDefFormatHelper - although
defined as int, it only ever returns 0, so it could be void too).
So either this changes to handle that or we change the *Callback to be
void. Do you have a preference?
> + }
> +
> + return 0;
> +}
> +
> +
> +int
> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
> virResctrlAllocForeachCacheCallback cb,
> void *opaque)
> @@ -952,6 +1010,240 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
> }
>
>
The next hunk of changes has perhaps more to do with the functionality
of the code vs. the pure alloc/free of the structures. I think that
alloc/free needs to be split from Parse/Format - it'll make things
easier. Then there's Parse of XML vs Parse of the schemata for the MB:
lines which just jumbles things (in my mind ;-))
This particular change may even be a 3rd patch... Keep reading ;-)
> +static void
> +virResctrlMemoryBandwidthSubtract(virResctrlAllocPtr free,
> + virResctrlAllocPtr used)
> +{
> + size_t i;
> +
> + if (!used->mem_bw)
> + return;
> +
> + for (i = 0; i < used->mem_bw->nbandwidths; i++) {
> + if (used->mem_bw->bandwidths[i])
> + *(free->mem_bw->bandwidths[i]) -= *(used->mem_bw->bandwidths[i]);
> + }
> +}
> +
> +
Might be nice to put some comments here. Since this is a backend of
sorts for the XML Parse API from patch8, let's wait to introduce it
there. That is it moves to patch8.
> +int
> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
> + unsigned int id,
> + unsigned int memory_bandwidth)
> +{
> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
> +
> + if (!mem_bw) {
> + if (VIR_ALLOC(mem_bw) < 0)
> + return -1;
> + alloc->mem_bw = mem_bw;
> + }
> +
> + if (mem_bw->nbandwidths <= id &&
> + VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
> + id - mem_bw->nbandwidths + 1) < 0)
> + return -1;
> +
> + if (mem_bw->bandwidths[id]) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Memory Bandwidth already defined for node %u"),
> + id);
> + return -1;
> + }
> +
> + if (VIR_ALLOC(mem_bw->bandwidths[id]) < 0)
> + return -1;
> +
> + *(mem_bw->bandwidths[id]) = memory_bandwidth;
> + return 0;
> +}
> +
> +
Seeing Format and Parse API's was confusing until I remembered that
these are for the schemata files. I think perhaps this section could
have been commented a bit more to indicate what it's for.
> +static int
> +virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
> + virBufferPtr buf)
> +{
> + size_t i;
> +
> + if (!alloc->mem_bw)
> + return 0;
> +
> + virBufferAddLit(buf, "MB:");
> +
> + for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
> + if (alloc->mem_bw->bandwidths[i]) {
> + virBufferAsprintf(buf, "%zd=%u;", i,
> + *(alloc->mem_bw->bandwidths[i]));
> + }
> + }
> +
> + virBufferTrim(buf, ";", 1);
> + virBufferAddChar(buf, '\n');
> + if (virBufferCheckError(buf) < 0)
> + return -1;
> + else
> + return 0;
return virBufferCheckError(buf);
> +}
> +
> +
This API is the backend of the virResctrlAllocAssign called during the
qemuProcessResctrlCreate processing - so it may be a third patch that
can be extracted from this one. I need to look a bit harder though.
Similarly the *Subtract API may fall into this category.
> +static int
> +virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + virResctrlAllocPtr free)
> +{
> + size_t i;
> + virResctrlAllocMemBWPtr mem_bw_alloc = alloc->mem_bw;
> + virResctrlAllocMemBWPtr mem_bw_free = free->mem_bw;
> + virResctrlInfoMemBWPtr mem_bw_info = resctrl->membw_info;
> +
> + if (!mem_bw_alloc)
> + return 0;
So can we even have one of these if resctrl->membw_info == NULL? The
->mem_bw is allocated in virResctrlSetMemoryBandwidth, which is only
defined, but not called. It will be called later in patch8 from a Parse
API I see.
> +
> + if (mem_bw_alloc && !mem_bw_info) {
In virResctrlGetMemoryBandwidthInfo it's "OK" if bandwidth_gran is not
present - a VIR_INFO is generated, the resctrl->membw_info is left as
NULL, and we return 0.
So failing here would seemingly be quite unexpected, wouldn't it?
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("RDT Memory Bandwidth allocation "
> + "unsupported"));
> + return -1;
> + }
> +
> + for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
> + if (!mem_bw_alloc->bandwidths[i])
> + continue;
> +
> + if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Memory Bandwidth allocation of size "
> + "%u is not divisible by granularity %u"),
> + *(mem_bw_alloc->bandwidths[i]),
> + mem_bw_info->bandwidth_granularity);
> + return -1;
> + }
> + if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Memory Bandwidth allocation of size "
> + "%u is smaller than the minimum "
> + "allowed allocation %u"),
> + *(mem_bw_alloc->bandwidths[i]),
> + mem_bw_info->min_bandwidth);
> + return -1;
> + }
> + if (i > mem_bw_info->max_id) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("bandwidth controller %zd not exist, "
s/%zd/id %zd/
s/not/does not/
> + "max controller id %u"),
> + i, mem_bw_info->max_id);
> + return -1;
> + }
> + if (*(mem_bw_alloc->bandwidths[i]) > *(mem_bw_free->bandwidths[i])) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Not enough room for allocation of %u%% "
> + "bandwidth on node %zd, available bandwidth %u%%"),
> + *(mem_bw_alloc->bandwidths[i]), i,
> + *(mem_bw_free->bandwidths[i]));
> + return -1;
> + }
> + }
So this is just a Check or Validate type function? Should it be renamed
or should it actually do something.
> + return 0;
> +}
> +
> +
More stuff for the schemata parse/format
> +static int
> +virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + char *mem_bw)
> +{
> + unsigned int bandwidth;
> + unsigned int id;
> + char *tmp = NULL;
> +
> + tmp = strchr(mem_bw, '=');
> + if (!tmp)
> + return 0;
> + *tmp = '\0';
> + tmp++;
> +
> + if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid node id %u "), id);
> + return -1;
> + }
> + if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid bandwidth %u"), bandwidth);
> + return -1;
> + }
> + if (bandwidth < resctrl->membw_info->min_bandwidth ||
> + id > resctrl->membw_info->max_id) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Missing or inconsistent resctrl info for "
> + "memory bandwidth node '%u'"), id);
> + return -1;
> + }
> + if (alloc->mem_bw->nbandwidths <= id &&
> + VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
> + id - alloc->mem_bw->nbandwidths + 1) < 0) {
> + return -1;
> + }
> + if (!alloc->mem_bw->bandwidths[id]) {
> + if (VIR_ALLOC(alloc->mem_bw->bandwidths[id]) < 0)
> + return -1;
> + }
> +
> + *(alloc->mem_bw->bandwidths[id]) = bandwidth;
> + return 0;
> +}
> +
> +
Ditto for schemata parse/format...
> +static int
> +virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + char *line)
> +{
> + char **mbs = NULL;
> + char *tmp = NULL;
> + size_t nmbs = 0;
> + size_t i;
> + int ret = -1;
> +
> + /* For no reason there can be spaces */
> + virSkipSpaces((const char **) &line);
> +
> + if (STRNEQLEN(line, "MB", 2))
> + return 0;
> +
> + if (!resctrl || !resctrl->membw_info ||
> + !resctrl->membw_info->min_bandwidth ||
> + !resctrl->membw_info->bandwidth_granularity) {
These checks seem out of place. If we get this far without !resctrl?
Wow... And erroring out if !resctrl->membw_info even though
virResctrlGetMemoryBandwidthInfo can succeed without it.
As for the min_bandwidth and bandwidth_granularity checks - those would
seem to be more appropriate in patch4 wouldn't they? when the values are
read... If they're read as zero, then I assume that's bad ;-).
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Missing or inconsistent resctrl info for "
> + "memory bandwidth allocation"));
> + }
> +
> + if (!alloc->mem_bw) {
> + if (VIR_ALLOC(alloc->mem_bw) < 0)
> + return -1;
> + }
> +
> + tmp = strchr(line, ':');
> + if (!tmp)
> + return 0;
> + tmp++;
> +
> + mbs = virStringSplitCount(tmp, ";", 0, &nmbs);
> + if (nmbs == 0)
> + return 0;
> +
> + for (i = 0; i < nmbs; i++) {
> + if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, mbs[i]) < 0)
> + goto cleanup;
> + }
> + ret = 0;
> + cleanup:
> + virStringListFree(mbs);
> + return ret;
> +}
> +
> +
> static int
> virResctrlAllocFormatCache(virResctrlAllocPtr alloc, virBufferPtr buf)
> {
> @@ -1013,6 +1305,11 @@ virResctrlAllocFormat(virResctrlAllocPtr alloc)
> return NULL;
> }
>
For schemata parse/format
> + if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0) {
> + virBufferFreeAndReset(&buf);
> + return NULL;
> + }
> +
> return virBufferContentAndReset(&buf);
> }
>
> @@ -1139,6 +1436,8 @@ virResctrlAllocParse(virResctrlInfoPtr resctrl,
>
> lines = virStringSplitCount(schemata, "\n", 0, &nlines);
> for (i = 0; i < nlines; i++) {
For schemata parse/format
> + if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, lines[i]) < 0)
> + goto cleanup;
> if (virResctrlAllocParseCacheLine(resctrl, alloc, lines[i]) < 0)
> goto cleanup;
> }
> @@ -1273,6 +1572,22 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
> }
> }
>
> + /* set default free memory bandwidth to 100%*/
> + if (info->membw_info) {
> + if (VIR_ALLOC(ret->mem_bw) < 0)
> + goto error;
> +
> + if (VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
> + info->membw_info->max_id + 1) < 0)
> + goto error;
> +
> + for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
> + if (VIR_ALLOC(ret->mem_bw->bandwidths[i]) < 0)
> + goto error;
> + *(ret->mem_bw->bandwidths[i]) = 100;
> + }
> + }
> +
> cleanup:
> virBitmapFree(mask);
> return ret;
> @@ -1284,13 +1599,14 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
>
> /*
> * This function creates an allocation that represents all unused parts of all
> - * caches in the system. It uses virResctrlInfo for creating a new full
> - * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
> - * scans for all allocations under /sys/fs/resctrl and subtracts each one of
> - * them from it. That way it can then return an allocation with only bit set
> - * being those that are not mentioned in any other allocation. It is used for
> - * two things, a) calculating the masks when creating allocations and b) from
> - * tests.
> + * caches and memory bandwidth in the system. It uses virResctrlInfo for
> + * creating a new full allocation with all bits set (using
> + * virResctrlAllocNewFromInfo()), memory bandwidth 100% and then scans
> + * for all allocations under /sys/fs/resctrl and subtracts each one of them
> + * from it. That way it can then return an allocation with only bit set
> + * being those that are not mentioned in any other allocation for CAT and
> + * available memory bandwidth for MBA. It is used for two things, a) calculating
> + * the masks and bandwidth available when creating allocations and b) from tests.
> */
> virResctrlAllocPtr
> virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
> @@ -1336,6 +1652,7 @@ virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
> goto error;
> }
>
Need to think about this, but perhaps would be appropriate for schemata
format/parse, but could be something for that 3rd patch out of this one.
> + virResctrlMemoryBandwidthSubtract(ret, alloc);
> virResctrlAllocSubtract(ret, alloc);
> virObjectUnref(alloc);
> alloc = NULL;
> @@ -1526,8 +1843,8 @@ virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
>
> /*
> * This function is called when creating an allocation in the system. What it
> - * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
> - * then tries to find a proper space for every requested allocation effectively
> + * does is that it gets all the unused resources using virResctrlAllocGetUnused()
> + * and then tries to find a proper space for every requested allocation effectively
> * transforming `sizes` into `masks`.
> */
> static int
> @@ -1547,6 +1864,9 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
> if (!alloc_default)
> goto cleanup;
>
Still need to come to grips with the 'best place' for this - perhaps a
3rd patch.
> + if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
> + goto cleanup;
> +
> if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
> goto cleanup;
>
Let's see whether I can help you split these up a bit. There's a few
things to answer from what I looked at. So far nothing is necessarily
wrong, it's just the splitting of things that makes it easier to think
about. The split you did has certainly helped thus far though.
John
I'll keep plugging away tomorrow.
> diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
> index d657c06..d43fd31 100644
> --- a/src/util/virresctrl.h
> +++ b/src/util/virresctrl.h
> @@ -73,6 +73,10 @@ typedef int virResctrlAllocForeachCacheCallback(unsigned int level,
> unsigned long long size,
> void *opaque);
>
> +typedef int virResctrlAllocForeachMemoryCallback(unsigned int id,
> + unsigned int size,
> + void *opaque);
> +
> virResctrlAllocPtr
> virResctrlAllocNew(void);
>
> @@ -85,6 +89,15 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
> virCacheType type,
> unsigned int cache,
> unsigned long long size);
> +int
> +virResctrlAllocForeachMemory(virResctrlAllocPtr resctrl,
> + virResctrlAllocForeachMemoryCallback cb,
> + void *opaque);
> +
> +int
> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
> + unsigned int id,
> + unsigned int memory_bandwidth);
>
> int
> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 2018年07月26日 06:37, John Ferlan wrote:
>
>
> On 07/18/2018 03:57 AM, bing.niu@intel.com wrote:
>> From: Bing Niu <bing.niu@intel.com>
>>
>> Add memory bandwidth allocation support to virresctrl class.
>> Introducing virResctrlAllocMemBW which is used for allocating memory
>> bandwidth. Following virResctrlAllocPerType, it also employs a
>> nested sparse array to indicate whether allocation is available for
>> particular last level cache.
>>
>> Signed-off-by: Bing Niu <bing.niu@intel.com>
>> ---
>> src/util/virresctrl.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++--
>> src/util/virresctrl.h | 13 ++
>> 2 files changed, 346 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
>> index 06e2702..bec2afd 100644
>> --- a/src/util/virresctrl.c
>> +++ b/src/util/virresctrl.c
>> @@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")
>>
>>
>> /* Resctrl is short for Resource Control. It might be implemented for various
>> - * resources, but at the time of this writing this is only supported for cache
>> - * allocation technology (aka CAT). Hence the reson for leaving 'Cache' out of
>> - * all the structure and function names for now (can be added later if needed.
>> + * resources. Currently this supports cache allocation technology (aka CAT) and
>> + * memory bandwidth allocation (aka MBA). More resources technologies may be
>> + * added in feature.
>
> s/feature/the future/
Will change.
>
>> */
>>
[....]
>> virObject parent;
>>
>> virResctrlAllocPerLevelPtr *levels;
>> size_t nlevels;
>>
>> + virResctrlAllocMemBWPtr mem_bw;
>> +
>> /* The identifier (any unique string for now) */
>> char *id;
>> /* libvirt-generated path in /sys/fs/resctrl for this particular
>> @@ -275,6 +302,13 @@ virResctrlAllocDispose(void *obj)
>> VIR_FREE(level);
>> }
>>
>> + if (alloc->mem_bw) {
>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>> + for (i = 0; i < mem_bw->nbandwidths; i++)
>> + VIR_FREE(mem_bw->bandwidths[i]);
>> + }
>> +
>> + VIR_FREE(alloc->mem_bw);
>
> NIT: Could be inside the if condition.
OK~
>
>> VIR_FREE(alloc->id);
>> VIR_FREE(alloc->path);
>> VIR_FREE(alloc->levels);
>> @@ -697,6 +731,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr alloc)
>> if (!alloc)
>> return true;
>>
>> + if (alloc->mem_bw)
>> + return false;
>> +
>> for (i = 0; i < alloc->nlevels; i++) {
>> virResctrlAllocPerLevelPtr a_level = alloc->levels[i];
>>
>> @@ -890,6 +927,27 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
>>
>>
>> int
>> +virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
>> + virResctrlAllocForeachMemoryCallback cb,
>> + void *opaque)
>> +{
>> + size_t i = 0;
>> +
>> + if (!alloc)
>> + return 0;
>> +
>> + if (alloc->mem_bw) {
>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>> + for (i = 0; i < mem_bw->nbandwidths; i++)
>> + if (mem_bw->bandwidths[i])
>> + cb(i, *mem_bw->bandwidths[i], opaque);
>
> @cb is an int function, but only ever used in what amounts to a void
> function in patch8 (virDomainMemorytuneDefFormatHelper - although
> defined as int, it only ever returns 0, so it could be void too).
>
> So either this changes to handle that or we change the *Callback to be
> void. Do you have a preference?
Let's add a return value testing in patch8.
I think we should change cachetune virDomainCachetuneDefFormatHelper
part also. Since it also doesn't check return value of
virResctrlAllocForeachCache.
IMO, Giving a return value of function is always a good practice. ;)
It's also give some spaces for the future extension. If logic inside @cb
became complex. :)
>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +
>> +int
>> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
>> virResctrlAllocForeachCacheCallback cb,
>> void *opaque)
>> @@ -952,6 +1010,240 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
>> }
>>
>>
>
> The next hunk of changes has perhaps more to do with the functionality
> of the code vs. the pure alloc/free of the structures. I think that
> alloc/free needs to be split from Parse/Format - it'll make things
> easier. Then there's Parse of XML vs Parse of the schemata for the MB:
> lines which just jumbles things (in my mind ;-))
>
> This particular change may even be a 3rd patch... Keep reading ;-)
Yes. I just realize put everything relating with ResctrlAlloc into one
patch will give difficulties to the reviewer. I should split this patch.
So how about we split this patch into:
1. introduce virResctrlAllocMemBW and its alloc and free part
2. introduce schemata file parsing part for MBA.
3. introduce functions used to calculate how much memory bandwidth is
used (that is find all groups already create)
4. introduce interface function used to set memory bandwidth
(virResctrlSetMemoryBandwidth).
5. introduce logic create resctrl group with memory bandwidth.
>
>> +static void
>> +virResctrlMemoryBandwidthSubtract(virResctrlAllocPtr free,
>> + virResctrlAllocPtr used)
>> +{
>> + size_t i;
>> +
>> + if (!used->mem_bw)
>> + return;
>> +
>> + for (i = 0; i < used->mem_bw->nbandwidths; i++) {
>> + if (used->mem_bw->bandwidths[i])
>> + *(free->mem_bw->bandwidths[i]) -= *(used->mem_bw->bandwidths[i]);
>> + }
>> +}
>> +
>> +
>
> Might be nice to put some comments here. Since this is a backend of
> sorts for the XML Parse API from patch8, let's wait to introduce it
> there. That is it moves to patch8.
I will add some comments here. And put into a separated patch.
Since patch8 is used to parse XML and create resctrl with MBA
information purely. So it's better we don't introduce this interface
function together with XML parse.
We can introduce interface first and wait our consumer patch8. :)
How do you think?
>
>> +int
>> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
>> + unsigned int id,
>> + unsigned int memory_bandwidth)
>> +{
>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>> +
>> + if (!mem_bw) {
>> + if (VIR_ALLOC(mem_bw) < 0)
>> + return -1;
>> + alloc->mem_bw = mem_bw;
>> + }
>> +
>> + if (mem_bw->nbandwidths <= id &&
>> + VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
>> + id - mem_bw->nbandwidths + 1) < 0)
>> + return -1;
>> +
>> + if (mem_bw->bandwidths[id]) {
>> + virReportError(VIR_ERR_XML_ERROR,
>> + _("Memory Bandwidth already defined for node %u"),
>> + id);
>> + return -1;
>> + }
>> +
>> + if (VIR_ALLOC(mem_bw->bandwidths[id]) < 0)
>> + return -1;
>> +
>> + *(mem_bw->bandwidths[id]) = memory_bandwidth;
>> + return 0;
>> +}
>> +
>> +
>
> Seeing Format and Parse API's was confusing until I remembered that
> these are for the schemata files. I think perhaps this section could
> have been commented a bit more to indicate what it's for.
Will add comments and put this into a dedicated patch with other
schemata file parse and formating functions.
>
>> +static int
>> +virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
>> + virBufferPtr buf)
>> +{
>> + size_t i;
>> +
>> + if (!alloc->mem_bw)
>> + return 0;
>> +
>> + virBufferAddLit(buf, "MB:");
>> +
>> + for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
>> + if (alloc->mem_bw->bandwidths[i]) {
>> + virBufferAsprintf(buf, "%zd=%u;", i,
>> + *(alloc->mem_bw->bandwidths[i]));
>> + }
>> + }
>> +
>> + virBufferTrim(buf, ";", 1);
>> + virBufferAddChar(buf, '\n');
>> + if (virBufferCheckError(buf) < 0)
>> + return -1;
>> + else
>> + return 0;
>
> return virBufferCheckError(buf);
OK.
>
>> +}
>> +
>> +
>
> This API is the backend of the virResctrlAllocAssign called during the
> qemuProcessResctrlCreate processing - so it may be a third patch that
> can be extracted from this one. I need to look a bit harder though.
> Similarly the *Subtract API may fall into this category.
Yes~
>
>> +static int
>> +virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
>> + virResctrlAllocPtr alloc,
>> + virResctrlAllocPtr free)
>> +{
>> + size_t i;
>> + virResctrlAllocMemBWPtr mem_bw_alloc = alloc->mem_bw;
>> + virResctrlAllocMemBWPtr mem_bw_free = free->mem_bw;
>> + virResctrlInfoMemBWPtr mem_bw_info = resctrl->membw_info;
>> +
>> + if (!mem_bw_alloc)
>> + return 0;
>
> So can we even have one of these if resctrl->membw_info == NULL? The
> ->mem_bw is allocated in virResctrlSetMemoryBandwidth, which is only
> defined, but not called. It will be called later in patch8 from a Parse
> API I see.
Yes. If host doesn't have MBA feature, however someone try to allocate
memory bandwidth for a VM by mistake. In such a case, we will have
resctrl->membw_info == NULL && ->mem_bw != NULL
Then below check will throw error.
>
>> +
>> + if (mem_bw_alloc && !mem_bw_info) {
>
> In virResctrlGetMemoryBandwidthInfo it's "OK" if bandwidth_gran is not
> present - a VIR_INFO is generated, the resctrl->membw_info is left as
> NULL, and we return 0.
>
> So failing here would seemingly be quite unexpected, wouldn't it?
Yes! you are right. Maybe someone try to allocate memory bandwidth on a
host without MBA support.
>
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> + _("RDT Memory Bandwidth allocation "
>> + "unsupported"));
>> + return -1;
>> + }
>> +
>> + for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
>> + if (!mem_bw_alloc->bandwidths[i])
>> + continue;
>> +
>> + if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("Memory Bandwidth allocation of size "
>> + "%u is not divisible by granularity %u"),
>> + *(mem_bw_alloc->bandwidths[i]),
>> + mem_bw_info->bandwidth_granularity);
>> + return -1;
>> + }
>> + if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("Memory Bandwidth allocation of size "
>> + "%u is smaller than the minimum "
>> + "allowed allocation %u"),
>> + *(mem_bw_alloc->bandwidths[i]),
>> + mem_bw_info->min_bandwidth);
>> + return -1;
>> + }
>> + if (i > mem_bw_info->max_id) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("bandwidth controller %zd not exist, "
>
> s/%zd/id %zd/
>
> s/not/does not/
Will update.
>
>> + "max controller id %u"),
>> + i, mem_bw_info->max_id);
>> + return -1;
>> + }
>> + if (*(mem_bw_alloc->bandwidths[i]) > *(mem_bw_free->bandwidths[i])) {
>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> + _("Not enough room for allocation of %u%% "
>> + "bandwidth on node %zd, available bandwidth %u%%"),
>> + *(mem_bw_alloc->bandwidths[i]), i,
>> + *(mem_bw_free->bandwidths[i]));
>> + return -1;
>> + }
>> + }
>
> So this is just a Check or Validate type function? Should it be renamed
> or should it actually do something.
er.... from code, it seems just a validate function to grantee enough
memory bandwidth available. We can think this function is allocating
memory bandwidth from free_memory_bandwidth. But the
free_memory_bandwidth is calculated by 100 - sum(memory bandwidth of
existing resctrl group). So there is no any management structure used to
track free memory bandwidth as other "allocate" or "free" functions, eg
memory manange code.
Maybe function name need to be adjust. I am not good at naming :(. Do
you have a suggestion?
>
>> + return 0;
>> +}
>> +
>> +
>
> More stuff for the schemata parse/format
>
>> +static int
>> +virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfoPtr resctrl,
>> + virResctrlAllocPtr alloc,
>> + char *mem_bw)
>> +{
>> + unsigned int bandwidth;
>> + unsigned int id;
>> + char *tmp = NULL;
>> +
>> + tmp = strchr(mem_bw, '=');
>> + if (!tmp)
>> + return 0;
>> + *tmp = '\0';
>> + tmp++;
>> +
>> + if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Invalid node id %u "), id);
>> + return -1;
>> + }
>> + if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Invalid bandwidth %u"), bandwidth);
>> + return -1;
>> + }
>> + if (bandwidth < resctrl->membw_info->min_bandwidth ||
>> + id > resctrl->membw_info->max_id) {
>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>> + _("Missing or inconsistent resctrl info for "
>> + "memory bandwidth node '%u'"), id);
>> + return -1;
>> + }
>> + if (alloc->mem_bw->nbandwidths <= id &&
>> + VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
>> + id - alloc->mem_bw->nbandwidths + 1) < 0) {
>> + return -1;
>> + }
>> + if (!alloc->mem_bw->bandwidths[id]) {
>> + if (VIR_ALLOC(alloc->mem_bw->bandwidths[id]) < 0)
>> + return -1;
>> + }
>> +
>> + *(alloc->mem_bw->bandwidths[id]) = bandwidth;
>> + return 0;
>> +}
>> +
>> +
>
> Ditto for schemata parse/format...
>
>> +static int
>> +virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
>> + virResctrlAllocPtr alloc,
>> + char *line)
>> +{
>> + char **mbs = NULL;
>> + char *tmp = NULL;
>> + size_t nmbs = 0;
>> + size_t i;
>> + int ret = -1;
>> +
>> + /* For no reason there can be spaces */
>> + virSkipSpaces((const char **) &line);
>> +
>> + if (STRNEQLEN(line, "MB", 2))
>> + return 0;
>> +
>> + if (!resctrl || !resctrl->membw_info ||
>> + !resctrl->membw_info->min_bandwidth ||
>> + !resctrl->membw_info->bandwidth_granularity) {
>
> These checks seem out of place. If we get this far without !resctrl?
> Wow... And erroring out if !resctrl->membw_info even though
> virResctrlGetMemoryBandwidthInfo can succeed without it.
>
> As for the min_bandwidth and bandwidth_granularity checks - those would
> seem to be more appropriate in patch4 wouldn't they? when the values are
> read... If they're read as zero, then I assume that's bad ;-).
Yes. Such thing should not happen. Above check means find a resctrl
group with memory bandwidth information on a host without MBA support.
Although it is harmless, we can remove this if you like.
min_bandwidth and bandwidth_granularity checks should be part of this
patch I think. Those parameters are used for sanity check for memory
bandwidth allocation, though they are read in patch 4. Do you agree? :)
>
>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> + _("Missing or inconsistent resctrl info for "
>> + "memory bandwidth allocation"));
>> + }
>> +
>> + if (!alloc->mem_bw) {
>> + if (VIR_ALLOC(alloc->mem_bw) < 0)
>> + return -1;
>> + }
>> +
>> + tmp = strchr(line, ':');
>> + if (!tmp)
>> + return 0;
>> + tmp++;
>> +
>> + mbs = virStringSplitCount(tmp, ";", 0, &nmbs);
>> + if (nmbs == 0)
>> + return 0;
>> +
>> + for (i = 0; i < nmbs; i++) {
>> + if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, mbs[i]) < 0)
>> + goto cleanup;
>> + }
>> + ret = 0;
>> + cleanup:
>> + virStringListFree(mbs);
>> + return ret;
>> +}
>> +
>> +
>> static int
>> virResctrlAllocFormatCache(virResctrlAllocPtr alloc, virBufferPtr buf)
>> {
>> @@ -1013,6 +1305,11 @@ virResctrlAllocFormat(virResctrlAllocPtr alloc)
>> return NULL;
>> }
>>
>
> For schemata parse/format
>
>> + if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0) {
>> + virBufferFreeAndReset(&buf);
>> + return NULL;
>> + }
>> +
>> return virBufferContentAndReset(&buf);
>> }
>>
>> @@ -1139,6 +1436,8 @@ virResctrlAllocParse(virResctrlInfoPtr resctrl,
>>
>> lines = virStringSplitCount(schemata, "\n", 0, &nlines);
>> for (i = 0; i < nlines; i++) {
>
> For schemata parse/format
>
>> + if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, lines[i]) < 0)
>> + goto cleanup;
>> if (virResctrlAllocParseCacheLine(resctrl, alloc, lines[i]) < 0)
>> goto cleanup;
>> }
>> @@ -1273,6 +1572,22 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
>> }
>> }
>>
>> + /* set default free memory bandwidth to 100%*/
>> + if (info->membw_info) {
>> + if (VIR_ALLOC(ret->mem_bw) < 0)
>> + goto error;
>> +
>> + if (VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
>> + info->membw_info->max_id + 1) < 0)
>> + goto error;
>> +
>> + for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
>> + if (VIR_ALLOC(ret->mem_bw->bandwidths[i]) < 0)
>> + goto error;
>> + *(ret->mem_bw->bandwidths[i]) = 100;
>> + }
>> + }
>> +
>> cleanup:
>> virBitmapFree(mask);
>> return ret;
>> @@ -1284,13 +1599,14 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
>>
>> /*
>> * This function creates an allocation that represents all unused parts of all
>> - * caches in the system. It uses virResctrlInfo for creating a new full
>> - * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
>> - * scans for all allocations under /sys/fs/resctrl and subtracts each one of
>> - * them from it. That way it can then return an allocation with only bit set
>> - * being those that are not mentioned in any other allocation. It is used for
>> - * two things, a) calculating the masks when creating allocations and b) from
>> - * tests.
>> + * caches and memory bandwidth in the system. It uses virResctrlInfo for
>> + * creating a new full allocation with all bits set (using
>> + * virResctrlAllocNewFromInfo()), memory bandwidth 100% and then scans
>> + * for all allocations under /sys/fs/resctrl and subtracts each one of them
>> + * from it. That way it can then return an allocation with only bit set
>> + * being those that are not mentioned in any other allocation for CAT and
>> + * available memory bandwidth for MBA. It is used for two things, a) calculating
>> + * the masks and bandwidth available when creating allocations and b) from tests.
>> */
>> virResctrlAllocPtr
>> virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
>> @@ -1336,6 +1652,7 @@ virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
>> goto error;
>> }
>>
>
> Need to think about this, but perhaps would be appropriate for schemata
> format/parse, but could be something for that 3rd patch out of this one.
>
>> + virResctrlMemoryBandwidthSubtract(ret, alloc);
>> virResctrlAllocSubtract(ret, alloc);
>> virObjectUnref(alloc);
>> alloc = NULL;
>> @@ -1526,8 +1843,8 @@ virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
>>
>> /*
>> * This function is called when creating an allocation in the system. What it
>> - * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
>> - * then tries to find a proper space for every requested allocation effectively
>> + * does is that it gets all the unused resources using virResctrlAllocGetUnused()
>> + * and then tries to find a proper space for every requested allocation effectively
>> * transforming `sizes` into `masks`.
>> */
>> static int
>> @@ -1547,6 +1864,9 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
>> if (!alloc_default)
>> goto cleanup;
>>
>
> Still need to come to grips with the 'best place' for this - perhaps a
> 3rd patch.
>
>> + if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
>> + goto cleanup;
>> +
>> if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
>> goto cleanup;
>>
>
> Let's see whether I can help you split these up a bit. There's a few
> things to answer from what I looked at. So far nothing is necessarily
> wrong, it's just the splitting of things that makes it easier to think
> about. The split you did has certainly helped thus far though.
>
> John
>
> I'll keep plugging away tomorrow.
Thanks for your review and help! :)
I just want to echo again. We will split this patch as:
1. introduce virResctrlAllocMemBW and its alloc and free part
2. introduce schemata file parsing part for MBA.
3. introduce functions used to calculate how much memory bandwidth is
used (that is find all groups already create)
4. introduce interface function used to set memory bandwidth
(virResctrlSetMemoryBandwidth).
5. introduce logic create resctrl group with memory bandwidth.
I may do some adjustment(merge or split above 5 patch) when try to split
this patch. Anyway, above 5 sub-patches are what we try to achive. How
do you think?
Bing
>
>
>> diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
>> index d657c06..d43fd31 100644
>> --- a/src/util/virresctrl.h
>> +++ b/src/util/virresctrl.h
>> @@ -73,6 +73,10 @@ typedef int virResctrlAllocForeachCacheCallback(unsigned int level,
>> unsigned long long size,
>> void *opaque);
>>
>> +typedef int virResctrlAllocForeachMemoryCallback(unsigned int id,
>> + unsigned int size,
>> + void *opaque);
>> +
>> virResctrlAllocPtr
>> virResctrlAllocNew(void);
>>
>> @@ -85,6 +89,15 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
>> virCacheType type,
>> unsigned int cache,
>> unsigned long long size);
>> +int
>> +virResctrlAllocForeachMemory(virResctrlAllocPtr resctrl,
>> + virResctrlAllocForeachMemoryCallback cb,
>> + void *opaque);
>> +
>> +int
>> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
>> + unsigned int id,
>> + unsigned int memory_bandwidth);
>>
>> int
>> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
On 07/26/2018 02:00 AM, bing.niu wrote:
>
>
> On 2018年07月26日 06:37, John Ferlan wrote:
>>
>>
>> On 07/18/2018 03:57 AM, bing.niu@intel.com wrote:
>>> From: Bing Niu <bing.niu@intel.com>
>>>
>>> Add memory bandwidth allocation support to virresctrl class.
>>> Introducing virResctrlAllocMemBW which is used for allocating memory
>>> bandwidth. Following virResctrlAllocPerType, it also employs a
>>> nested sparse array to indicate whether allocation is available for
>>> particular last level cache.
>>>
>>> Signed-off-by: Bing Niu <bing.niu@intel.com>
>>> ---
>>> src/util/virresctrl.c | 346
>>> ++++++++++++++++++++++++++++++++++++++++++++++++--
>>> src/util/virresctrl.h | 13 ++
>>> 2 files changed, 346 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
>>> index 06e2702..bec2afd 100644
>>> --- a/src/util/virresctrl.c
>>> +++ b/src/util/virresctrl.c
>>> @@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")
>>> /* Resctrl is short for Resource Control. It might be
>>> implemented for various
>>> - * resources, but at the time of this writing this is only supported
>>> for cache
>>> - * allocation technology (aka CAT). Hence the reson for leaving
>>> 'Cache' out of
>>> - * all the structure and function names for now (can be added later
>>> if needed.
>>> + * resources. Currently this supports cache allocation technology
>>> (aka CAT) and
>>> + * memory bandwidth allocation (aka MBA). More resources
>>> technologies may be
>>> + * added in feature.
>>
>> s/feature/the future/
>
> Will change.
>>
>>> */
>>>
> [....]
>>> virObject parent;
>>> virResctrlAllocPerLevelPtr *levels;
>>> size_t nlevels;
>>> + virResctrlAllocMemBWPtr mem_bw;
>>> +
>>> /* The identifier (any unique string for now) */
>>> char *id;
>>> /* libvirt-generated path in /sys/fs/resctrl for this particular
>>> @@ -275,6 +302,13 @@ virResctrlAllocDispose(void *obj)
>>> VIR_FREE(level);
>>> }
>>> + if (alloc->mem_bw) {
>>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>>> + for (i = 0; i < mem_bw->nbandwidths; i++)
>>> + VIR_FREE(mem_bw->bandwidths[i]);
>>> + }
>>> +
>>> + VIR_FREE(alloc->mem_bw);
>>
>> NIT: Could be inside the if condition.
>
> OK~
>>
>>> VIR_FREE(alloc->id);
>>> VIR_FREE(alloc->path);
>>> VIR_FREE(alloc->levels);
>>> @@ -697,6 +731,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr alloc)
>>> if (!alloc)
>>> return true;
>>> + if (alloc->mem_bw)
>>> + return false;
>>> +
>>> for (i = 0; i < alloc->nlevels; i++) {
>>> virResctrlAllocPerLevelPtr a_level = alloc->levels[i];
>>> @@ -890,6 +927,27 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr
>>> alloc,
>>> int
>>> +virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
>>> + virResctrlAllocForeachMemoryCallback cb,
>>> + void *opaque)
>>> +{
>>> + size_t i = 0;
>>> +
>>> + if (!alloc)
>>> + return 0;
>>> +
>>> + if (alloc->mem_bw) {
>>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>>> + for (i = 0; i < mem_bw->nbandwidths; i++)
>>> + if (mem_bw->bandwidths[i])
>>> + cb(i, *mem_bw->bandwidths[i], opaque);
>>
>> @cb is an int function, but only ever used in what amounts to a void
>> function in patch8 (virDomainMemorytuneDefFormatHelper - although
>> defined as int, it only ever returns 0, so it could be void too).
>>
>> So either this changes to handle that or we change the *Callback to be
>> void. Do you have a preference?
>
> Let's add a return value testing in patch8.
> I think we should change cachetune virDomainCachetuneDefFormatHelper
> part also. Since it also doesn't check return value of
> virResctrlAllocForeachCache.
>
> IMO, Giving a return value of function is always a good practice. ;)
> It's also give some spaces for the future extension. If logic inside @cb
> became complex. :)
>
Yeah, typically those Foreach type function want to return something
such that we stop processing in the event of some failure, no need to
keep going and pile on the errors or have some unexpected success!
>>
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +
>>> +int
>>> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
>>> virResctrlAllocForeachCacheCallback cb,
>>> void *opaque)
>>> @@ -952,6 +1010,240 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
>>> }
>>>
>>
>> The next hunk of changes has perhaps more to do with the functionality
>> of the code vs. the pure alloc/free of the structures. I think that
>> alloc/free needs to be split from Parse/Format - it'll make things
>> easier. Then there's Parse of XML vs Parse of the schemata for the MB:
>> lines which just jumbles things (in my mind ;-))
>>
>> This particular change may even be a 3rd patch... Keep reading ;-)
>
> Yes. I just realize put everything relating with ResctrlAlloc into one
> patch will give difficulties to the reviewer. I should split this patch.
> So how about we split this patch into:
>
> 1. introduce virResctrlAllocMemBW and its alloc and free part
> 2. introduce schemata file parsing part for MBA.
> 3. introduce functions used to calculate how much memory bandwidth is
> used (that is find all groups already create)
> 4. introduce interface function used to set memory bandwidth
> (virResctrlSetMemoryBandwidth).
> 5. introduce logic create resctrl group with memory bandwidth.
So I've done some of this already before reading your response - I'll
send what I have directly to you though. "So far" what I came up with
from just this patch:
util: Add MBA allocation to virresctrl
-> Just the allocation and dispose changes
util: Add MBA schemata parse and format methods
-> Split out virResctrlAllocMemoryBandwidthFormat and
virResctrlAllocParseMemoryBandwidthLine
-> Changed the order of virResctrlAllocParse to L* first and MB
second. It shouldn't matter, but it is the order it's formatted
util: Add support to calculate MBA utilization
-> Split out virResctrlMemoryBandwidthSubtract and
virResctrlAllocMemoryBandwidth
-> NB, for these two I moved them "closer" to their usage rather than
mixed up in the Parse/Format methods above.
-> I have a question for virResctrlMemoryBandwidthSubtract though. I
noted that virResctrlAllocSubtract gets called twice - once before the
while loop and once during the while loop. So should the same occur for
the BandwidthSubtract?
util: Introduce virResctrlAllocForeachMemory
-> Split out just the one API
-> Modify the code slightly to return 0 earlier if !alloc->mem_bw
-> Modify the for loop to return -1 if @cb fails
-> Add the API to src/libvirt_private.syms
util: Introduce virResctrlAllocSetMemoryBandwidth
-> All that was left over
-> Rename from virResctrlSetMemoryBandwidth
-> Move code to be "similar" to the virResctrlAllocSetCacheSize such
that it's before the AllocForeach API
-> Add the API to src/libvirt_private.syms
>>
>>> +static void
>>> +virResctrlMemoryBandwidthSubtract(virResctrlAllocPtr free,
>>> + virResctrlAllocPtr used)
>>> +{
>>> + size_t i;
>>> +
>>> + if (!used->mem_bw)
>>> + return;
>>> +
>>> + for (i = 0; i < used->mem_bw->nbandwidths; i++) {
>>> + if (used->mem_bw->bandwidths[i])
>>> + *(free->mem_bw->bandwidths[i]) -=
>>> *(used->mem_bw->bandwidths[i]);
>>> + }
>>> +}
>>> +
>>> +
>>
>> Might be nice to put some comments here. Since this is a backend of
>> sorts for the XML Parse API from patch8, let's wait to introduce it
>> there. That is it moves to patch8.
>
> I will add some comments here. And put into a separated patch.
> Since patch8 is used to parse XML and create resctrl with MBA
> information purely. So it's better we don't introduce this interface
> function together with XML parse.
> We can introduce interface first and wait our consumer patch8. :)
> How do you think?
Upon further reflection, introducing before is fine - you'll see in what
I send to you.
>>
>>> +int
>>> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
>>> + unsigned int id,
>>> + unsigned int memory_bandwidth)
>>> +{
>>> + virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
>>> +
>>> + if (!mem_bw) {
>>> + if (VIR_ALLOC(mem_bw) < 0)
>>> + return -1;
>>> + alloc->mem_bw = mem_bw;
>>> + }
>>> +
>>> + if (mem_bw->nbandwidths <= id &&
>>> + VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
>>> + id - mem_bw->nbandwidths + 1) < 0)
>>> + return -1;
>>> +
>>> + if (mem_bw->bandwidths[id]) {
>>> + virReportError(VIR_ERR_XML_ERROR,
>>> + _("Memory Bandwidth already defined for node
>>> %u"),
>>> + id);
>>> + return -1;
>>> + }
>>> +
>>> + if (VIR_ALLOC(mem_bw->bandwidths[id]) < 0)
>>> + return -1;
>>> +
>>> + *(mem_bw->bandwidths[id]) = memory_bandwidth;
>>> + return 0;
>>> +}
>>> +
>>> +
>>
>> Seeing Format and Parse API's was confusing until I remembered that
>> these are for the schemata files. I think perhaps this section could
>> have been commented a bit more to indicate what it's for.
>
> Will add comments and put this into a dedicated patch with other
> schemata file parse and formating functions.
>>
>>> +static int
>>> +virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
>>> + virBufferPtr buf)
>>> +{
>>> + size_t i;
>>> +
>>> + if (!alloc->mem_bw)
>>> + return 0;
>>> +
>>> + virBufferAddLit(buf, "MB:");
>>> +
>>> + for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
>>> + if (alloc->mem_bw->bandwidths[i]) {
>>> + virBufferAsprintf(buf, "%zd=%u;", i,
>>> + *(alloc->mem_bw->bandwidths[i]));
>>> + }
>>> + }
>>> +
>>> + virBufferTrim(buf, ";", 1);
>>> + virBufferAddChar(buf, '\n');
>>> + if (virBufferCheckError(buf) < 0)
>>> + return -1;
>>> + else
>>> + return 0;
>>
>> return virBufferCheckError(buf);
>
> OK.
>>
>>> +}
>>> +
>>> +
>>
>> This API is the backend of the virResctrlAllocAssign called during the
>> qemuProcessResctrlCreate processing - so it may be a third patch that
>> can be extracted from this one. I need to look a bit harder though.
>> Similarly the *Subtract API may fall into this category.
>
> Yes~
>>
>>> +static int
>>> +virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
>>> + virResctrlAllocPtr alloc,
>>> + virResctrlAllocPtr free)
>>> +{
>>> + size_t i;
>>> + virResctrlAllocMemBWPtr mem_bw_alloc = alloc->mem_bw;
>>> + virResctrlAllocMemBWPtr mem_bw_free = free->mem_bw;
>>> + virResctrlInfoMemBWPtr mem_bw_info = resctrl->membw_info;
>>> +
>>> + if (!mem_bw_alloc)
>>> + return 0;
>>
>> So can we even have one of these if resctrl->membw_info == NULL? The
>> ->mem_bw is allocated in virResctrlSetMemoryBandwidth, which is only
>> defined, but not called. It will be called later in patch8 from a Parse
>> API I see.
>
> Yes. If host doesn't have MBA feature, however someone try to allocate
> memory bandwidth for a VM by mistake. In such a case, we will have
> resctrl->membw_info == NULL && ->mem_bw != NULL
> Then below check will throw error.
>>
>>> +
>>> + if (mem_bw_alloc && !mem_bw_info) {
>>
>> In virResctrlGetMemoryBandwidthInfo it's "OK" if bandwidth_gran is not
>> present - a VIR_INFO is generated, the resctrl->membw_info is left as
>> NULL, and we return 0.
>>
>> So failing here would seemingly be quite unexpected, wouldn't it?
>
> Yes! you are right. Maybe someone try to allocate memory bandwidth on a
> host without MBA support.
I didn't change any of the error scenarios in what I'm sending to you...
Just be sure to add a few more comments about what's being checked.
Since everything was packed into one patch as I'm reviewing I'm
thinking, is this the internal or external caller.
>>
>>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>>> + _("RDT Memory Bandwidth allocation "
>>> + "unsupported"));
>>> + return -1;
>>> + }
>>> +
>>> + for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
>>> + if (!mem_bw_alloc->bandwidths[i])
>>> + continue;
>>> +
>>> + if (*(mem_bw_alloc->bandwidths[i]) %
>>> mem_bw_info->bandwidth_granularity) {
>>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>>> + _("Memory Bandwidth allocation of size "
>>> + "%u is not divisible by granularity %u"),
>>> + *(mem_bw_alloc->bandwidths[i]),
>>> + mem_bw_info->bandwidth_granularity);
>>> + return -1;
>>> + }
>>> + if (*(mem_bw_alloc->bandwidths[i]) <
>>> mem_bw_info->min_bandwidth) {
>>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>>> + _("Memory Bandwidth allocation of size "
>>> + "%u is smaller than the minimum "
>>> + "allowed allocation %u"),
>>> + *(mem_bw_alloc->bandwidths[i]),
>>> + mem_bw_info->min_bandwidth);
>>> + return -1;
>>> + }
>>> + if (i > mem_bw_info->max_id) {
>>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>>> + _("bandwidth controller %zd not exist, "
>>
>> s/%zd/id %zd/
>>
>> s/not/does not/
>
> Will update.
>>
>>> + "max controller id %u"),
>>> + i, mem_bw_info->max_id);
>>> + return -1;
>>> + }
>>> + if (*(mem_bw_alloc->bandwidths[i]) >
>>> *(mem_bw_free->bandwidths[i])) {
>>> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>>> + _("Not enough room for allocation of %u%% "
>>> + "bandwidth on node %zd, available
>>> bandwidth %u%%"),
>>> + *(mem_bw_alloc->bandwidths[i]), i,
>>> + *(mem_bw_free->bandwidths[i]));
>>> + return -1;
>>> + }
>>> + }
>>
>> So this is just a Check or Validate type function? Should it be renamed
>> or should it actually do something.
>
> er.... from code, it seems just a validate function to grantee enough
> memory bandwidth available. We can think this function is allocating
> memory bandwidth from free_memory_bandwidth. But the
> free_memory_bandwidth is calculated by 100 - sum(memory bandwidth of
> existing resctrl group). So there is no any management structure used to
> track free memory bandwidth as other "allocate" or "free" functions, eg
> memory manange code.
> Maybe function name need to be adjust. I am not good at naming :(. Do
> you have a suggestion?
As another Red Hat libvirt engineer says, "Naming is hard". Problem is
there's a lot of opinions about the perfect name.
If this is a Check or Validation function, then let's use that in the
name. As it is now as virResctrlAllocMemoryBandwidth is just too
generic, using the virResctrlAlloc 'prefix', perhaps an adjustment to
virResctrlAllocCheckMemoryBandwidth is sufficient with of course a few
extra comments about it's purpose.
>>
>>> + return 0;
>>> +}
>>> +
>>> +
>>
>> More stuff for the schemata parse/format
>>
>>> +static int
>>> +virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfoPtr resctrl,
>>> + virResctrlAllocPtr alloc,
>>> + char *mem_bw)
>>> +{
>>> + unsigned int bandwidth;
>>> + unsigned int id;
>>> + char *tmp = NULL;
>>> +
>>> + tmp = strchr(mem_bw, '=');
>>> + if (!tmp)
>>> + return 0;
>>> + *tmp = '\0';
>>> + tmp++;
>>> +
>>> + if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
>>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>>> + _("Invalid node id %u "), id);
>>> + return -1;
>>> + }
>>> + if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
>>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>>> + _("Invalid bandwidth %u"), bandwidth);
>>> + return -1;
>>> + }
>>> + if (bandwidth < resctrl->membw_info->min_bandwidth ||
>>> + id > resctrl->membw_info->max_id) {
>>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>>> + _("Missing or inconsistent resctrl info for "
>>> + "memory bandwidth node '%u'"), id);
>>> + return -1;
>>> + }
>>> + if (alloc->mem_bw->nbandwidths <= id &&
>>> + VIR_EXPAND_N(alloc->mem_bw->bandwidths,
>>> alloc->mem_bw->nbandwidths,
>>> + id - alloc->mem_bw->nbandwidths + 1) < 0) {
>>> + return -1;
>>> + }
>>> + if (!alloc->mem_bw->bandwidths[id]) {
>>> + if (VIR_ALLOC(alloc->mem_bw->bandwidths[id]) < 0)
>>> + return -1;
>>> + }
>>> +
>>> + *(alloc->mem_bw->bandwidths[id]) = bandwidth;
>>> + return 0;
>>> +}
>>> +
>>> +
>>
>> Ditto for schemata parse/format...
>>
>>> +static int
>>> +virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
>>> + virResctrlAllocPtr alloc,
>>> + char *line)
>>> +{
>>> + char **mbs = NULL;
>>> + char *tmp = NULL;
>>> + size_t nmbs = 0;
>>> + size_t i;
>>> + int ret = -1;
>>> +
>>> + /* For no reason there can be spaces */
>>> + virSkipSpaces((const char **) &line);
>>> +
>>> + if (STRNEQLEN(line, "MB", 2))
>>> + return 0;
>>> +
>>> + if (!resctrl || !resctrl->membw_info ||
>>> + !resctrl->membw_info->min_bandwidth ||
>>> + !resctrl->membw_info->bandwidth_granularity) {
>>
>> These checks seem out of place. If we get this far without !resctrl?
>> Wow... And erroring out if !resctrl->membw_info even though
>> virResctrlGetMemoryBandwidthInfo can succeed without it.
>>
>> As for the min_bandwidth and bandwidth_granularity checks - those would
>> seem to be more appropriate in patch4 wouldn't they? when the values are
>> read... If they're read as zero, then I assume that's bad ;-).
>
> Yes. Such thing should not happen. Above check means find a resctrl
> group with memory bandwidth information on a host without MBA support.
> Although it is harmless, we can remove this if you like.
> min_bandwidth and bandwidth_granularity checks should be part of this
> patch I think. Those parameters are used for sanity check for memory
> bandwidth allocation, though they are read in patch 4. Do you agree? :)
Yeah, I think parameter value validation is fine at read time. I know
there's other code that does that. I didn't check if the CAT code did
that though.
I think in the long term you need to decide - if I found an MB: line in
the schemata file, then can it even be possible that membw_info is NULL.
I suppose it is an error - again with all the code being added together
in one patch, it was "confusing" to consider the Parse/Format of the
schemata file. I suppose in the back of my mind I had he parse/format of
the XML file...
>>
>>> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>>> + _("Missing or inconsistent resctrl info for "
>>> + "memory bandwidth allocation"));
>>> + }
>>> +
>>> + if (!alloc->mem_bw) {
>>> + if (VIR_ALLOC(alloc->mem_bw) < 0)
>>> + return -1;
>>> + }
>>> +
>>> + tmp = strchr(line, ':');
>>> + if (!tmp)
>>> + return 0;
>>> + tmp++;
>>> +
>>> + mbs = virStringSplitCount(tmp, ";", 0, &nmbs);
>>> + if (nmbs == 0)
>>> + return 0;
>>> +
>>> + for (i = 0; i < nmbs; i++) {
>>> + if (virResctrlAllocParseProcessMemoryBandwidth(resctrl,
>>> alloc, mbs[i]) < 0)
>>> + goto cleanup;
>>> + }
>>> + ret = 0;
>>> + cleanup:
>>> + virStringListFree(mbs);
>>> + return ret;
>>> +}
>>> +
>>> +
>>> static int
>>> virResctrlAllocFormatCache(virResctrlAllocPtr alloc, virBufferPtr buf)
>>> {
>>> @@ -1013,6 +1305,11 @@ virResctrlAllocFormat(virResctrlAllocPtr alloc)
>>> return NULL;
>>> }
>>>
>>
>> For schemata parse/format
>>
>>> + if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0) {
>>> + virBufferFreeAndReset(&buf);
>>> + return NULL;
>>> + }
>>> +
>>> return virBufferContentAndReset(&buf);
>>> }
>>> @@ -1139,6 +1436,8 @@ virResctrlAllocParse(virResctrlInfoPtr resctrl,
>>> lines = virStringSplitCount(schemata, "\n", 0, &nlines);
>>> for (i = 0; i < nlines; i++) {
>>
>> For schemata parse/format
>>
>>> + if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc,
>>> lines[i]) < 0)
>>> + goto cleanup;
>>> if (virResctrlAllocParseCacheLine(resctrl, alloc, lines[i])
>>> < 0)
>>> goto cleanup;
>>> }
>>> @@ -1273,6 +1572,22 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr
>>> info)
>>> }
>>> }
>>> + /* set default free memory bandwidth to 100%*/
>>> + if (info->membw_info) {
>>> + if (VIR_ALLOC(ret->mem_bw) < 0)
>>> + goto error;
>>> +
>>> + if (VIR_EXPAND_N(ret->mem_bw->bandwidths,
>>> ret->mem_bw->nbandwidths,
>>> + info->membw_info->max_id + 1) < 0)
>>> + goto error;
>>> +
>>> + for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
>>> + if (VIR_ALLOC(ret->mem_bw->bandwidths[i]) < 0)
>>> + goto error;
>>> + *(ret->mem_bw->bandwidths[i]) = 100;
>>> + }
>>> + }
>>> +
>>> cleanup:
>>> virBitmapFree(mask);
>>> return ret;
>>> @@ -1284,13 +1599,14 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr
>>> info)
>>> /*
>>> * This function creates an allocation that represents all unused
>>> parts of all
>>> - * caches in the system. It uses virResctrlInfo for creating a new
>>> full
>>> - * allocation with all bits set (using virResctrlAllocNewFromInfo())
>>> and then
>>> - * scans for all allocations under /sys/fs/resctrl and subtracts
>>> each one of
>>> - * them from it. That way it can then return an allocation with
>>> only bit set
>>> - * being those that are not mentioned in any other allocation. It
>>> is used for
>>> - * two things, a) calculating the masks when creating allocations
>>> and b) from
>>> - * tests.
>>> + * caches and memory bandwidth in the system. It uses
>>> virResctrlInfo for
>>> + * creating a new full allocation with all bits set (using
>>> + * virResctrlAllocNewFromInfo()), memory bandwidth 100% and then scans
>>> + * for all allocations under /sys/fs/resctrl and subtracts each one
>>> of them
>>> + * from it. That way it can then return an allocation with only bit
>>> set
>>> + * being those that are not mentioned in any other allocation for
>>> CAT and
>>> + * available memory bandwidth for MBA. It is used for two things,
>>> a) calculating
>>> + * the masks and bandwidth available when creating allocations and
>>> b) from tests.
>>> */
>>> virResctrlAllocPtr
>>> virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
>>> @@ -1336,6 +1652,7 @@ virResctrlAllocGetUnused(virResctrlInfoPtr
>>> resctrl)
>>> goto error;
>>> }
>>>
>>
>> Need to think about this, but perhaps would be appropriate for schemata
>> format/parse, but could be something for that 3rd patch out of this one.
>>
>>> + virResctrlMemoryBandwidthSubtract(ret, alloc);
>>> virResctrlAllocSubtract(ret, alloc);
>>> virObjectUnref(alloc);
>>> alloc = NULL;
>>> @@ -1526,8 +1843,8 @@ virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
>>> /*
>>> * This function is called when creating an allocation in the
>>> system. What it
>>> - * does is that it gets all the unused bits using
>>> virResctrlAllocGetUnused() and
>>> - * then tries to find a proper space for every requested allocation
>>> effectively
>>> + * does is that it gets all the unused resources using
>>> virResctrlAllocGetUnused()
>>> + * and then tries to find a proper space for every requested
>>> allocation effectively
>>> * transforming `sizes` into `masks`.
>>> */
>>> static int
>>> @@ -1547,6 +1864,9 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
>>> if (!alloc_default)
>>> goto cleanup;
>>>
>>
>> Still need to come to grips with the 'best place' for this - perhaps a
>> 3rd patch.
>>
>>> + if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
>>> + goto cleanup;
>>> +
>>> if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
>>> goto cleanup;
>>>
>>
>> Let's see whether I can help you split these up a bit. There's a few
>> things to answer from what I looked at. So far nothing is necessarily
>> wrong, it's just the splitting of things that makes it easier to think
>> about. The split you did has certainly helped thus far though.
>>
>> John
>>
>> I'll keep plugging away tomorrow.
>
> Thanks for your review and help! :)
> I just want to echo again. We will split this patch as:
>
> 1. introduce virResctrlAllocMemBW and its alloc and free part
> 2. introduce schemata file parsing part for MBA.
> 3. introduce functions used to calculate how much memory bandwidth is
> used (that is find all groups already create)
> 4. introduce interface function used to set memory bandwidth
> (virResctrlSetMemoryBandwidth).
> 5. introduce logic create resctrl group with memory bandwidth.
>
> I may do some adjustment(merge or split above 5 patch) when try to split
> this patch. Anyway, above 5 sub-patches are what we try to achive. How
> do you think?
>
> Bing
I'll send you what I have "so far" shortly. There are some formatting
adjustments included, but for the most part it's taking this one patch
and splitting it before the subsequent rename patch. The only other
adjustment was to use the "new" name for the SetMemoryBandwidth
John
>>
>>
>>> diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
>>> index d657c06..d43fd31 100644
>>> --- a/src/util/virresctrl.h
>>> +++ b/src/util/virresctrl.h
>>> @@ -73,6 +73,10 @@ typedef int
>>> virResctrlAllocForeachCacheCallback(unsigned int level,
>>> unsigned long long
>>> size,
>>> void *opaque);
>>> +typedef int virResctrlAllocForeachMemoryCallback(unsigned int id,
>>> + unsigned int size,
>>> + void *opaque);
>>> +
>>> virResctrlAllocPtr
>>> virResctrlAllocNew(void);
>>> @@ -85,6 +89,15 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr
>>> alloc,
>>> virCacheType type,
>>> unsigned int cache,
>>> unsigned long long size);
>>> +int
>>> +virResctrlAllocForeachMemory(virResctrlAllocPtr resctrl,
>>> + virResctrlAllocForeachMemoryCallback cb,
>>> + void *opaque);
>>> +
>>> +int
>>> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
>>> + unsigned int id,
>>> + unsigned int memory_bandwidth);
>>> int
>>> virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
>>>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2026 Red Hat, Inc.