Expose memory scan/reclaim information to the host side via virtio
balloon device.
Now we have a metric to analyze the memory performance:
y: counter increases
n: counter does not changes
h: the rate of counter change is high
l: the rate of counter change is low
OOM: VIRTIO_BALLOON_S_OOM_KILL
STALL: VIRTIO_BALLOON_S_ALLOC_STALL
ASCAN: VIRTIO_BALLOON_S_SCAN_ASYNC
DSCAN: VIRTIO_BALLOON_S_SCAN_DIRECT
ARCLM: VIRTIO_BALLOON_S_RECLAIM_ASYNC
DRCLM: VIRTIO_BALLOON_S_RECLAIM_DIRECT
- OOM[y], STALL[*], ASCAN[*], DSCAN[*], ARCLM[*], DRCLM[*]:
the guest runs under really critial memory pressure
- OOM[n], STALL[h], ASCAN[*], DSCAN[l], ARCLM[*], DRCLM[l]:
the memory allocation stalls due to cgroup, not the global memory
pressure.
- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[h]:
the memory allocation stalls due to global memory pressure. The
performance gets hurt a lot. A high ratio between DRCLM/DSCAN shows
quite effective memory reclaiming.
- OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[l]:
the memory allocation stalls due to global memory pressure.
the ratio between DRCLM/DSCAN gets low, the guest OS is thrashing
heavily, the serious case leads poor performance and difficult
trouble shooting. Ex, sshd may block on memory allocation when
accepting new connections, a user can't login a VM by ssh command.
- OOM[n], STALL[n], ASCAN[h], DSCAN[n], ARCLM[l], DRCLM[n]:
the low ratio between ARCLM/ASCAN shows that the guest tries to
reclaim more memory, but it can't. Once more memory is required in
future, it will struggle to reclaim memory.
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
drivers/virtio/virtio_balloon.c | 9 +++++++++
include/uapi/linux/virtio_balloon.h | 12 ++++++++++--
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 4b9c9569f6e5..7b86514e99d4 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -372,6 +372,15 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
stall += events[ALLOCSTALL_MOVABLE];
update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_SCAN_ASYNC,
+ pages_to_bytes(events[PGSCAN_KSWAPD]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_SCAN_DIRECT,
+ pages_to_bytes(events[PGSCAN_DIRECT]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_RECLAIM_ASYNC,
+ pages_to_bytes(events[PGSTEAL_KSWAPD]));
+ update_stat(vb, idx++, VIRTIO_BALLOON_S_RECLAIM_DIRECT,
+ pages_to_bytes(events[PGSTEAL_DIRECT]));
+
return idx;
}
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13d0c32ba27c..0875a9cccb01 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -73,7 +73,11 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
#define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */
-#define VIRTIO_BALLOON_S_NR 12
+#define VIRTIO_BALLOON_S_SCAN_ASYNC 12 /* Amount of memory scanned asynchronously */
+#define VIRTIO_BALLOON_S_SCAN_DIRECT 13 /* Amount of memory scanned directly */
+#define VIRTIO_BALLOON_S_RECLAIM_ASYNC 14 /* Amount of memory reclaimed asynchronously */
+#define VIRTIO_BALLOON_S_RECLAIM_DIRECT 15 /* Amount of memory reclaimed directly */
+#define VIRTIO_BALLOON_S_NR 16
#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
@@ -87,7 +91,11 @@ struct virtio_balloon_config {
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
VIRTIO_BALLOON_S_NAMES_prefix "oom-kill", \
- VIRTIO_BALLOON_S_NAMES_prefix "alloc-stall" \
+ VIRTIO_BALLOON_S_NAMES_prefix "alloc-stall", \
+ VIRTIO_BALLOON_S_NAMES_prefix "scan-async", \
+ VIRTIO_BALLOON_S_NAMES_prefix "scan-direct", \
+ VIRTIO_BALLOON_S_NAMES_prefix "reclaim-async", \
+ VIRTIO_BALLOON_S_NAMES_prefix "reclaim-direct" \
}
#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
--
2.34.1
On 15.04.24 10:41, zhenwei pi wrote:
> Expose memory scan/reclaim information to the host side via virtio
> balloon device.
>
> Now we have a metric to analyze the memory performance:
>
> y: counter increases
> n: counter does not changes
> h: the rate of counter change is high
> l: the rate of counter change is low
>
> OOM: VIRTIO_BALLOON_S_OOM_KILL
> STALL: VIRTIO_BALLOON_S_ALLOC_STALL
> ASCAN: VIRTIO_BALLOON_S_SCAN_ASYNC
> DSCAN: VIRTIO_BALLOON_S_SCAN_DIRECT
> ARCLM: VIRTIO_BALLOON_S_RECLAIM_ASYNC
> DRCLM: VIRTIO_BALLOON_S_RECLAIM_DIRECT
>
> - OOM[y], STALL[*], ASCAN[*], DSCAN[*], ARCLM[*], DRCLM[*]:
> the guest runs under really critial memory pressure
>
> - OOM[n], STALL[h], ASCAN[*], DSCAN[l], ARCLM[*], DRCLM[l]:
> the memory allocation stalls due to cgroup, not the global memory
> pressure.
>
> - OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[h]:
> the memory allocation stalls due to global memory pressure. The
> performance gets hurt a lot. A high ratio between DRCLM/DSCAN shows
> quite effective memory reclaiming.
>
> - OOM[n], STALL[h], ASCAN[*], DSCAN[h], ARCLM[*], DRCLM[l]:
> the memory allocation stalls due to global memory pressure.
> the ratio between DRCLM/DSCAN gets low, the guest OS is thrashing
> heavily, the serious case leads poor performance and difficult
> trouble shooting. Ex, sshd may block on memory allocation when
> accepting new connections, a user can't login a VM by ssh command.
>
> - OOM[n], STALL[n], ASCAN[h], DSCAN[n], ARCLM[l], DRCLM[n]:
> the low ratio between ARCLM/ASCAN shows that the guest tries to
> reclaim more memory, but it can't. Once more memory is required in
> future, it will struggle to reclaim memory.
>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
> drivers/virtio/virtio_balloon.c | 9 +++++++++
> include/uapi/linux/virtio_balloon.h | 12 ++++++++++--
> 2 files changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 4b9c9569f6e5..7b86514e99d4 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -372,6 +372,15 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
> stall += events[ALLOCSTALL_MOVABLE];
> update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
>
> + update_stat(vb, idx++, VIRTIO_BALLOON_S_SCAN_ASYNC,
> + pages_to_bytes(events[PGSCAN_KSWAPD]));
> + update_stat(vb, idx++, VIRTIO_BALLOON_S_SCAN_DIRECT,
> + pages_to_bytes(events[PGSCAN_DIRECT]));
> + update_stat(vb, idx++, VIRTIO_BALLOON_S_RECLAIM_ASYNC,
> + pages_to_bytes(events[PGSTEAL_KSWAPD]));
> + update_stat(vb, idx++, VIRTIO_BALLOON_S_RECLAIM_DIRECT,
> + pages_to_bytes(events[PGSTEAL_DIRECT]));
> +
> return idx;
> }
>
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 13d0c32ba27c..0875a9cccb01 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -73,7 +73,11 @@ struct virtio_balloon_config {
> #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
> #define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
> #define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */
> -#define VIRTIO_BALLOON_S_NR 12
> +#define VIRTIO_BALLOON_S_SCAN_ASYNC 12 /* Amount of memory scanned asynchronously */
> +#define VIRTIO_BALLOON_S_SCAN_DIRECT 13 /* Amount of memory scanned directly */
> +#define VIRTIO_BALLOON_S_RECLAIM_ASYNC 14 /* Amount of memory reclaimed asynchronously */
> +#define VIRTIO_BALLOON_S_RECLAIM_DIRECT 15 /* Amount of memory reclaimed directly */
Should these be the other way around:
ASYN_SCAN
...
ASYNC_RECLAIM
so we can get ...
> +#define VIRTIO_BALLOON_S_NR 16
>
> #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
> VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
> @@ -87,7 +91,11 @@ struct virtio_balloon_config {
> VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
> VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
> VIRTIO_BALLOON_S_NAMES_prefix "oom-kill", \
> - VIRTIO_BALLOON_S_NAMES_prefix "alloc-stall" \
> + VIRTIO_BALLOON_S_NAMES_prefix "alloc-stall", \
> + VIRTIO_BALLOON_S_NAMES_prefix "scan-async", \
> + VIRTIO_BALLOON_S_NAMES_prefix "scan-direct", \
> + VIRTIO_BALLOON_S_NAMES_prefix "reclaim-async", \
> + VIRTIO_BALLOON_S_NAMES_prefix "reclaim-direct" \
...
"async-scans", "async-reclaims" ...
--
Cheers,
David / dhildenb
© 2016 - 2026 Red Hat, Inc.