1
Currently, passing mem-lock=on to QEMU causes memory usage to grow by
1
Currently, passing mem-lock=on to QEMU causes memory usage to grow by
2
huge amounts:
2
huge amounts:
3
3
4
no memlock:
4
no memlock:
5
$ qemu-system-x86_64 -overcommit mem-lock=off
5
$ ./qemu-system-x86_64 -overcommit mem-lock=off
6
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
6
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
7
45652
7
45652
8
8
9
$ ./qemu-system-x86_64 -overcommit mem-lock=off -enable-kvm
9
$ ./qemu-system-x86_64 -overcommit mem-lock=off -enable-kvm
10
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
10
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
11
39756
11
39756
12
12
13
memlock:
13
memlock:
14
$ qemu-system-x86_64 -overcommit mem-lock=on
14
$ ./qemu-system-x86_64 -overcommit mem-lock=on
15
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
15
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
16
1309876
16
1309876
17
17
18
$ ./qemu-system-x86_64 -overcommit mem-lock=on -enable-kvm
18
$ ./qemu-system-x86_64 -overcommit mem-lock=on -enable-kvm
19
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
19
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
...
...
30
active.
30
active.
31
31
32
mem-lock=on helps against this (given compact_unevictable_allowed is 0),
32
mem-lock=on helps against this (given compact_unevictable_allowed is 0),
33
but the memory overhead it introduces is an undesirable side effect,
33
but the memory overhead it introduces is an undesirable side effect,
34
which we can completely avoid by passing MCL_ONFAULT to mlockall, which
34
which we can completely avoid by passing MCL_ONFAULT to mlockall, which
35
is what this series allows to do with a new command line option called
35
is what this series allows to do with a new option for mem-lock called
36
mem-lock-onfault.
36
on-fault.
37
37
38
memlock-onfault:
38
memlock=on-fault:
39
$ qemu-system-x86_64 -overcommit mem-lock-onfault=on
39
$ ./qemu-system-x86_64 -overcommit mem-lock=on-fault
40
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
40
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
41
54004
41
54004
42
42
43
$ ./qemu-system-x86_64 -overcommit mem-lock-onfault=on -enable-kvm
43
$ ./qemu-system-x86_64 -overcommit mem-lock=on-fault -enable-kvm
44
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
44
$ ps -p $(pidof ./qemu-system-x86_64) -o rss=
45
47772
45
47772
46
46
47
You may notice the memory usage is still slightly higher, in this case
47
You may notice the memory usage is still slightly higher, in this case
48
by a few megabytes over the mem-lock=off case. I was able to trace this
48
by a few megabytes over the mem-lock=off case. I was able to trace this
49
down to a bug in the linux kernel with MCL_ONFAULT not being honored for
49
down to a bug in the linux kernel with MCL_ONFAULT not being honored for
50
the early process heap (with brk(2) etc.) so it is still write-faulted in
50
the early process heap (with brk(2) etc.) so it is still write-faulted in
51
this case, but it's still way less than it was with just the mem-lock=on.
51
this case, but it's still way less than it was with just the mem-lock=on.
52
52
53
Changes since v1:
54
- Don't make a separate mem-lock-onfault, add an on-fault option to mem-lock instead
55
53
Daniil Tatianin (2):
56
Daniil Tatianin (2):
54
os: add an ability to lock memory on_fault
57
os: add an ability to lock memory on_fault
55
overcommit: introduce mem-lock-onfault
58
overcommit: introduce mem-lock=on-fault
56
59
57
include/sysemu/os-posix.h | 2 +-
60
include/sysemu/os-posix.h | 2 +-
58
include/sysemu/os-win32.h | 3 ++-
61
include/sysemu/os-win32.h | 3 ++-
59
include/sysemu/sysemu.h | 1 +
62
include/sysemu/sysemu.h | 1 +
60
migration/postcopy-ram.c | 4 ++--
63
migration/postcopy-ram.c | 4 ++--
61
os-posix.c | 10 ++++++++--
64
os-posix.c | 10 +++++++--
62
qemu-options.hx | 13 ++++++++++---
65
qemu-options.hx | 14 +++++++-----
63
system/globals.c | 1 +
66
system/globals.c | 1 +
64
system/vl.c | 18 ++++++++++++++++--
67
system/vl.c | 46 +++++++++++++++++++++++++++++++--------
65
8 files changed, 41 insertions(+), 11 deletions(-)
68
8 files changed, 61 insertions(+), 20 deletions(-)
66
69
67
--
70
--
68
2.34.1
71
2.34.1
diff view generated by jsdifflib
1
This will be used in the following commits to make it possible to only
1
This will be used in the following commits to make it possible to only
2
lock memory on fault instead of right away.
2
lock memory on fault instead of right away.
3
3
4
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
4
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
5
---
5
---
6
include/sysemu/os-posix.h | 2 +-
6
include/sysemu/os-posix.h | 2 +-
7
include/sysemu/os-win32.h | 3 ++-
7
include/sysemu/os-win32.h | 3 ++-
8
migration/postcopy-ram.c | 2 +-
8
migration/postcopy-ram.c | 2 +-
9
os-posix.c | 10 ++++++++--
9
os-posix.c | 10 ++++++++--
10
system/vl.c | 2 +-
10
system/vl.c | 2 +-
11
5 files changed, 13 insertions(+), 6 deletions(-)
11
5 files changed, 13 insertions(+), 6 deletions(-)
12
12
13
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
13
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/sysemu/os-posix.h
15
--- a/include/sysemu/os-posix.h
16
+++ b/include/sysemu/os-posix.h
16
+++ b/include/sysemu/os-posix.h
17
@@ -XXX,XX +XXX,XX @@ bool os_set_runas(const char *user_id);
17
@@ -XXX,XX +XXX,XX @@ bool os_set_runas(const char *user_id);
18
void os_set_chroot(const char *path);
18
void os_set_chroot(const char *path);
19
void os_setup_limits(void);
19
void os_setup_limits(void);
20
void os_setup_post(void);
20
void os_setup_post(void);
21
-int os_mlock(void);
21
-int os_mlock(void);
22
+int os_mlock(bool on_fault);
22
+int os_mlock(bool on_fault);
23
23
24
/**
24
/**
25
* qemu_alloc_stack:
25
* qemu_alloc_stack:
26
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
26
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
27
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/sysemu/os-win32.h
28
--- a/include/sysemu/os-win32.h
29
+++ b/include/sysemu/os-win32.h
29
+++ b/include/sysemu/os-win32.h
30
@@ -XXX,XX +XXX,XX @@ static inline bool is_daemonized(void)
30
@@ -XXX,XX +XXX,XX @@ static inline bool is_daemonized(void)
31
return false;
31
return false;
32
}
32
}
33
33
34
-static inline int os_mlock(void)
34
-static inline int os_mlock(void)
35
+static inline int os_mlock(bool on_fault)
35
+static inline int os_mlock(bool on_fault)
36
{
36
{
37
+ (void)on_fault;
37
+ (void)on_fault;
38
return -ENOSYS;
38
return -ENOSYS;
39
}
39
}
40
40
41
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
41
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
42
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
43
--- a/migration/postcopy-ram.c
43
--- a/migration/postcopy-ram.c
44
+++ b/migration/postcopy-ram.c
44
+++ b/migration/postcopy-ram.c
45
@@ -XXX,XX +XXX,XX @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
45
@@ -XXX,XX +XXX,XX @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
46
}
46
}
47
47
48
if (enable_mlock) {
48
if (enable_mlock) {
49
- if (os_mlock() < 0) {
49
- if (os_mlock() < 0) {
50
+ if (os_mlock(false) < 0) {
50
+ if (os_mlock(false) < 0) {
51
error_report("mlock: %s", strerror(errno));
51
error_report("mlock: %s", strerror(errno));
52
/*
52
/*
53
* It doesn't feel right to fail at this point, we have a valid
53
* It doesn't feel right to fail at this point, we have a valid
54
diff --git a/os-posix.c b/os-posix.c
54
diff --git a/os-posix.c b/os-posix.c
55
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
56
--- a/os-posix.c
56
--- a/os-posix.c
57
+++ b/os-posix.c
57
+++ b/os-posix.c
58
@@ -XXX,XX +XXX,XX @@ void os_set_line_buffering(void)
58
@@ -XXX,XX +XXX,XX @@ void os_set_line_buffering(void)
59
setvbuf(stdout, NULL, _IOLBF, 0);
59
setvbuf(stdout, NULL, _IOLBF, 0);
60
}
60
}
61
61
62
-int os_mlock(void)
62
-int os_mlock(void)
63
+int os_mlock(bool on_fault)
63
+int os_mlock(bool on_fault)
64
{
64
{
65
#ifdef HAVE_MLOCKALL
65
#ifdef HAVE_MLOCKALL
66
int ret = 0;
66
int ret = 0;
67
+ int flags = MCL_CURRENT | MCL_FUTURE;
67
+ int flags = MCL_CURRENT | MCL_FUTURE;
68
68
69
- ret = mlockall(MCL_CURRENT | MCL_FUTURE);
69
- ret = mlockall(MCL_CURRENT | MCL_FUTURE);
70
+ if (on_fault) {
70
+ if (on_fault) {
71
+ flags |= MCL_ONFAULT;
71
+ flags |= MCL_ONFAULT;
72
+ }
72
+ }
73
+
73
+
74
+ ret = mlockall(flags);
74
+ ret = mlockall(flags);
75
if (ret < 0) {
75
if (ret < 0) {
76
error_report("mlockall: %s", strerror(errno));
76
error_report("mlockall: %s", strerror(errno));
77
}
77
}
78
78
79
return ret;
79
return ret;
80
#else
80
#else
81
+ (void)on_fault;
81
+ (void)on_fault;
82
return -ENOSYS;
82
return -ENOSYS;
83
#endif
83
#endif
84
}
84
}
85
diff --git a/system/vl.c b/system/vl.c
85
diff --git a/system/vl.c b/system/vl.c
86
index XXXXXXX..XXXXXXX 100644
86
index XXXXXXX..XXXXXXX 100644
87
--- a/system/vl.c
87
--- a/system/vl.c
88
+++ b/system/vl.c
88
+++ b/system/vl.c
89
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_run_with_opts = {
89
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_run_with_opts = {
90
static void realtime_init(void)
90
static void realtime_init(void)
91
{
91
{
92
if (enable_mlock) {
92
if (enable_mlock) {
93
- if (os_mlock() < 0) {
93
- if (os_mlock() < 0) {
94
+ if (os_mlock(false) < 0) {
94
+ if (os_mlock(false) < 0) {
95
error_report("locking memory failed");
95
error_report("locking memory failed");
96
exit(1);
96
exit(1);
97
}
97
}
98
--
98
--
99
2.34.1
99
2.34.1
diff view generated by jsdifflib
...
...
7
7
8
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
8
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
9
---
9
---
10
include/sysemu/sysemu.h | 1 +
10
include/sysemu/sysemu.h | 1 +
11
migration/postcopy-ram.c | 4 ++--
11
migration/postcopy-ram.c | 4 ++--
12
qemu-options.hx | 13 ++++++++++---
12
qemu-options.hx | 14 +++++++-----
13
system/globals.c | 1 +
13
system/globals.c | 1 +
14
system/vl.c | 18 ++++++++++++++++--
14
system/vl.c | 46 ++++++++++++++++++++++++++++++++--------
15
5 files changed, 30 insertions(+), 7 deletions(-)
15
5 files changed, 50 insertions(+), 16 deletions(-)
16
16
17
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
17
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/sysemu/sysemu.h
19
--- a/include/sysemu/sysemu.h
20
+++ b/include/sysemu/sysemu.h
20
+++ b/include/sysemu/sysemu.h
...
...
48
@@ -XXX,XX +XXX,XX @@ SRST
48
@@ -XXX,XX +XXX,XX @@ SRST
49
ERST
49
ERST
50
50
51
DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
51
DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
52
- "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
52
- "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
53
+ "-overcommit [mem-lock=on|off][mem-lock-onfault=on|off][cpu-pm=on|off]\n"
53
+ "-overcommit [mem-lock=on|off|on-fault][cpu-pm=on|off]\n"
54
" run qemu with overcommit hints\n"
54
" run qemu with overcommit hints\n"
55
" mem-lock=on|off controls memory lock support (default: off)\n"
55
- " mem-lock=on|off controls memory lock support (default: off)\n"
56
+ " mem-lock-onfault=on|off controls memory lock on fault support (default: off)\n"
56
+ " mem-lock=on|off|on-fault controls memory lock support (default: off)\n"
57
" cpu-pm=on|off controls cpu power management (default: off)\n",
57
" cpu-pm=on|off controls cpu power management (default: off)\n",
58
QEMU_ARCH_ALL)
58
QEMU_ARCH_ALL)
59
SRST
59
SRST
60
``-overcommit mem-lock=on|off``
60
-``-overcommit mem-lock=on|off``
61
+``-overcommit mem-lock=on|off|on-fault``
61
\
62
\
62
+``-overcommit mem-lock-onfault=on|off``
63
+ \
64
``-overcommit cpu-pm=on|off``
63
``-overcommit cpu-pm=on|off``
65
Run qemu with hints about host resource overcommit. The default is
64
Run qemu with hints about host resource overcommit. The default is
66
to assume that host overcommits all resources.
65
to assume that host overcommits all resources.
67
66
68
Locking qemu and guest memory can be enabled via ``mem-lock=on``
67
Locking qemu and guest memory can be enabled via ``mem-lock=on``
69
- (disabled by default). This works when host memory is not
68
- (disabled by default). This works when host memory is not
70
- overcommitted and reduces the worst-case latency for guest.
69
- overcommitted and reduces the worst-case latency for guest.
71
+ or ``mem-lock-onfault=on`` (disabled by default). This works when
70
+ or ``mem-lock=on-fault`` (disabled by default). This works when
72
+ host memory is not overcommitted and reduces the worst-case latency for
71
+ host memory is not overcommitted and reduces the worst-case latency for
73
+ guest. The on-fault option is better for reducing the memory footprint
72
+ guest. The on-fault option is better for reducing the memory footprint
74
+ since it makes allocations lazy, but the pages still get locked in place
73
+ since it makes allocations lazy, but the pages still get locked in place
75
+ once faulted by the guest or QEMU. Note that the two options are mutually
74
+ once faulted by the guest or QEMU. Note that the two options are mutually
76
+ exclusive.
75
+ exclusive.
...
...
92
diff --git a/system/vl.c b/system/vl.c
91
diff --git a/system/vl.c b/system/vl.c
93
index XXXXXXX..XXXXXXX 100644
92
index XXXXXXX..XXXXXXX 100644
94
--- a/system/vl.c
93
--- a/system/vl.c
95
+++ b/system/vl.c
94
+++ b/system/vl.c
96
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_overcommit_opts = {
95
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_overcommit_opts = {
96
.desc = {
97
{
97
.name = "mem-lock",
98
.name = "mem-lock",
98
.type = QEMU_OPT_BOOL,
99
- .type = QEMU_OPT_BOOL,
100
+ .type = QEMU_OPT_STRING,
99
},
101
},
100
+ {
101
+ .name = "mem-lock-onfault",
102
+ .type = QEMU_OPT_BOOL,
103
+ },
104
{
102
{
105
.name = "cpu-pm",
103
.name = "cpu-pm",
106
.type = QEMU_OPT_BOOL,
107
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_run_with_opts = {
104
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qemu_run_with_opts = {
108
105
109
static void realtime_init(void)
106
static void realtime_init(void)
110
{
107
{
111
- if (enable_mlock) {
108
- if (enable_mlock) {
...
...
114
+ if (os_mlock(enable_mlock_onfault) < 0) {
111
+ if (os_mlock(enable_mlock_onfault) < 0) {
115
error_report("locking memory failed");
112
error_report("locking memory failed");
116
exit(1);
113
exit(1);
117
}
114
}
118
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
115
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
119
if (!opts) {
116
object_option_parse(optarg);
117
break;
118
case QEMU_OPTION_overcommit:
119
- opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
120
- optarg, false);
121
- if (!opts) {
122
+ {
123
+ const char *mem_lock_opt;
124
+
125
+ opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
126
+ optarg, false);
127
+ if (!opts) {
128
+ exit(1);
129
+ }
130
+
131
+ enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
132
+
133
+ mem_lock_opt = qemu_opt_get(opts, "mem-lock");
134
+ if (!mem_lock_opt) {
135
+ break;
136
+ }
137
+
138
+ if (strcmp(mem_lock_opt, "on") == 0) {
139
+ enable_mlock = true;
140
+ break;
141
+ }
142
+
143
+ if (strcmp(mem_lock_opt, "off") == 0) {
144
+ enable_mlock = false;
145
+ enable_mlock_onfault = false;
146
+ break;
147
+ }
148
+
149
+ if (strcmp(mem_lock_opt, "on-fault") == 0) {
150
+ enable_mlock_onfault = true;
151
+ break;
152
+ }
153
+
154
+ error_report("parameter 'mem-lock' expects one of "
155
+ "'on', 'off', 'on-fault'");
120
exit(1);
156
exit(1);
121
}
157
}
122
+
158
- enable_mlock = qemu_opt_get_bool(opts, "mem-lock", enable_mlock);
123
enable_mlock = qemu_opt_get_bool(opts, "mem-lock", enable_mlock);
159
- enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
124
+ enable_mlock_onfault = qemu_opt_get_bool(opts,
160
- break;
125
+ "mem-lock-onfault",
126
+ enable_mlock_onfault);
127
+ if (enable_mlock && enable_mlock_onfault) {
128
+ error_report("mem-lock and mem-lock-onfault are mutually"
129
+ "exclusive");
130
+ exit(1);
131
+ }
132
+
133
enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
134
break;
135
case QEMU_OPTION_compat:
161
case QEMU_OPTION_compat:
162
{
163
CompatPolicy *opts_policy;
136
--
164
--
137
2.34.1
165
2.34.1
diff view generated by jsdifflib