1 | v9 | ||
---|---|---|---|
2 | * Rebase on top of aa3a285b5bc56a4208b3b57d4a55291e9c260107; | ||
3 | * Optimize the error handling in multifd_send_setup(); | ||
4 | * Use the correct way for skipping unit test; | ||
5 | |||
6 | v8 | ||
7 | * Rebase on top of 1cf9bc6eba7506ab6d9de635f224259225f63466; | ||
8 | * Fixed the hmp parsing crash in migrate_set_parameter; | ||
9 | * Addressed initialization/cleanup comments from v7; | ||
10 | |||
11 | v7 | ||
12 | * Rebase on top of f0a5a31c33a8109061c2493e475c8a2f4d022432; | ||
13 | * Fix a bug that will crash QEMU when DSA initialization failed; | ||
14 | * Use a more generalized accel-path to support other accelerators; | ||
15 | * Remove multifd-packet-size in the parameter list; | ||
16 | |||
17 | v6 | ||
18 | * Rebase on top of 838fc0a8769d7cc6edfe50451ba4e3368395f5c1; | ||
19 | * Refactor code to have clean history on all commits; | ||
20 | * Add comments on DSA specific defines about how the value is picked; | ||
21 | * Address all comments from v5 reviews about api defines, questions, etc.; | ||
22 | |||
23 | v5 | ||
24 | * Rebase on top of 39a032cea23e522268519d89bb738974bc43b6f6. | ||
25 | * Rename struct definitions with typedef and CamelCase names; | ||
26 | * Add build and runtime checks about DSA accelerator; | ||
27 | * Address all comments from v4 reviews about typos, licenses, comments, | ||
28 | error reporting, etc. | ||
29 | |||
30 | v4 | ||
31 | * Rebase on top of 85b597413d4370cb168f711192eaef2eb70535ac. | ||
32 | * A separate "multifd zero page checking" patchset was split from this | ||
33 | patchset's v3 and got merged into master. v4 re-applied the rest of all | ||
34 | commits on top of that patchset, re-factored and re-tested. | ||
35 | https://lore.kernel.org/all/20240311180015.3359271-1-hao.xiang@linux.dev/ | ||
36 | * There are some feedback from v3 I likely overlooked. | ||
37 | |||
38 | v3 | ||
39 | * Rebase on top of 7425b6277f12e82952cede1f531bfc689bf77fb1. | ||
40 | * Fix error/warning from checkpatch.pl | ||
41 | * Fix use-after-free bug when multifd-dsa-accel option is not set. | ||
42 | * Handle error from dsa_init and correctly propogate the error. | ||
43 | * Remove unnecessary call to dsa_stop. | ||
44 | * Detect availability of DSA feature at compile time. | ||
45 | * Implement a generic batch_task structure and a DSA specific one dsa_batch_task. | ||
46 | * Remove all exit() calls and propagate errors correctly. | ||
47 | * Use bytes instead of page count to configure multifd-packet-size option. | ||
48 | |||
1 | v2 | 49 | v2 |
2 | * Rebase on top of 3e01f1147a16ca566694b97eafc941d62fa1e8d8. | 50 | * Rebase on top of 3e01f1147a16ca566694b97eafc941d62fa1e8d8. |
3 | * Leave Juan's changes in their original form instead of squashing them. | 51 | * Leave Juan's changes in their original form instead of squashing them. |
4 | * Add a new commit to refactor the multifd_send_thread function to prepare for introducing the DSA offload functionality. | 52 | * Add a new commit to refactor the multifd_send_thread function to prepare for introducing the DSA offload functionality. |
5 | * Use page count to configure multifd-packet-size option. | 53 | * Use page count to configure multifd-packet-size option. |
... | ... | ||
26 | CPU to DSA accelerator hardware. This patchset implements a solution to offload | 74 | CPU to DSA accelerator hardware. This patchset implements a solution to offload |
27 | QEMU's zero page checking from CPU to DSA accelerator hardware. We gain | 75 | QEMU's zero page checking from CPU to DSA accelerator hardware. We gain |
28 | two benefits from this change: | 76 | two benefits from this change: |
29 | 1. Reduces CPU usage in multifd live migration workflow across all use | 77 | 1. Reduces CPU usage in multifd live migration workflow across all use |
30 | cases. | 78 | cases. |
31 | 2. Reduces migration total time in some use cases. | 79 | 2. Reduces migration total time in some use cases. |
32 | 80 | ||
33 | * Design: | 81 | * Design: |
34 | 82 | ||
35 | These are the logical steps to perform DSA offloading: | 83 | These are the logical steps to perform DSA offloading: |
36 | 1. Configure DSA accelerators and create user space openable DSA work | 84 | 1. Configure DSA accelerators and create user space openable DSA work |
... | ... | ||
105 | CPU MHz: 2538.624 | 153 | CPU MHz: 2538.624 |
106 | CPU max MHz: 3800.0000 | 154 | CPU max MHz: 3800.0000 |
107 | CPU min MHz: 800.0000 | 155 | CPU min MHz: 800.0000 |
108 | 156 | ||
109 | We perform multifd live migration with below setup: | 157 | We perform multifd live migration with below setup: |
110 | 1. VM has 100GB memory. | 158 | 1. VM has 100GB memory. |
111 | 2. Use the new migration option multifd-set-normal-page-ratio to control the total | 159 | 2. Use the new migration option multifd-set-normal-page-ratio to control the total |
112 | size of the payload sent over the network. | 160 | size of the payload sent over the network. |
113 | 3. Use 8 multifd channels. | 161 | 3. Use 8 multifd channels. |
114 | 4. Use tcp for live migration. | 162 | 4. Use tcp for live migration. |
115 | 4. Use CPU to perform zero page checking as the baseline. | 163 | 4. Use CPU to perform zero page checking as the baseline. |
... | ... | ||
152 | that is 23% total CPU usage savings. | 200 | that is 23% total CPU usage savings. |
153 | 201 | ||
154 | Latency | 202 | Latency |
155 | |---------------|---------------|---------------|---------------|---------------|---------------| | 203 | |---------------|---------------|---------------|---------------|---------------|---------------| |
156 | | |total time |down time |throughput |transferred-ram|total-ram | | 204 | | |total time |down time |throughput |transferred-ram|total-ram | |
157 | |---------------|---------------|---------------|---------------|---------------|---------------| | 205 | |---------------|---------------|---------------|---------------|---------------|---------------| |
158 | |Baseline |10343 ms |161 ms |41007.00 mbps |51583797 kb |102400520 kb | | 206 | |Baseline |10343 ms |161 ms |41007.00 mbps |51583797 kb |102400520 kb | |
159 | |---------------|---------------|---------------|---------------|-------------------------------| | 207 | |---------------|---------------|---------------|---------------|-------------------------------| |
160 | |DSA offload |9535 ms |135 ms |46554.40 mbps |53947545 kb |102400520 kb | | 208 | |DSA offload |9535 ms |135 ms |46554.40 mbps |53947545 kb |102400520 kb | |
161 | |---------------|---------------|---------------|---------------|---------------|---------------| | 209 | |---------------|---------------|---------------|---------------|---------------|---------------| |
162 | 210 | ||
163 | Total time is 8% faster and down time is 16% faster. | 211 | Total time is 8% faster and down time is 16% faster. |
164 | 212 | ||
165 | B) Scenario 2: 100% (100GB) zero pages on an 100GB vm. | 213 | B) Scenario 2: 100% (100GB) zero pages on an 100GB vm. |
... | ... | ||
194 | 8700 msec. That is 22% CPU savings. | 242 | 8700 msec. That is 22% CPU savings. |
195 | 243 | ||
196 | Latency | 244 | Latency |
197 | |--------------------------------------------------------------------------------------------| | 245 | |--------------------------------------------------------------------------------------------| |
198 | | |total time |down time |throughput |transferred-ram|total-ram | | 246 | | |total time |down time |throughput |transferred-ram|total-ram | |
199 | |---------------|---------------|---------------|---------------|---------------|------------| | 247 | |---------------|---------------|---------------|---------------|---------------|------------| |
200 | |Baseline |4867 ms |20 ms |1.51 mbps |565 kb |102400520 kb| | 248 | |Baseline |4867 ms |20 ms |1.51 mbps |565 kb |102400520 kb| |
201 | |---------------|---------------|---------------|---------------|----------------------------| | 249 | |---------------|---------------|---------------|---------------|----------------------------| |
202 | |DSA offload |3888 ms |18 ms |1.89 mbps |565 kb |102400520 kb| | 250 | |DSA offload |3888 ms |18 ms |1.89 mbps |565 kb |102400520 kb| |
203 | |---------------|---------------|---------------|---------------|---------------|------------| | 251 | |---------------|---------------|---------------|---------------|---------------|------------| |
204 | 252 | ||
205 | Total time 20% faster and down time 10% faster. | 253 | Total time 20% faster and down time 10% faster. |
206 | 254 | ||
207 | * Testing: | 255 | * Testing: |
208 | 256 | ||
209 | 1. Added unit tests for cover the added code path in dsa.c | 257 | 1. Added unit tests for cover the added code path in dsa.c |
210 | 2. Added integration tests to cover multifd live migration using DSA | 258 | 2. Added integration tests to cover multifd live migration using DSA |
211 | offloading. | 259 | offloading. |
212 | 260 | ||
213 | * Patchset | 261 | Hao Xiang (10): |
214 | |||
215 | Apply this patchset on top of commit | ||
216 | f78ea7ddb0e18766ece9fdfe02061744a7afc41b | ||
217 | |||
218 | Hao Xiang (16): | ||
219 | meson: Introduce new instruction set enqcmd to the build system. | 262 | meson: Introduce new instruction set enqcmd to the build system. |
220 | util/dsa: Add dependency idxd. | ||
221 | util/dsa: Implement DSA device start and stop logic. | 263 | util/dsa: Implement DSA device start and stop logic. |
222 | util/dsa: Implement DSA task enqueue and dequeue. | 264 | util/dsa: Implement DSA task enqueue and dequeue. |
223 | util/dsa: Implement DSA task asynchronous completion thread model. | 265 | util/dsa: Implement DSA task asynchronous completion thread model. |
224 | util/dsa: Implement zero page checking in DSA task. | 266 | util/dsa: Implement zero page checking in DSA task. |
225 | util/dsa: Implement DSA task asynchronous submission and wait for | 267 | util/dsa: Implement DSA task asynchronous submission and wait for |
226 | completion. | 268 | completion. |
227 | migration/multifd: Add new migration option for multifd DSA | 269 | migration/multifd: Add new migration option for multifd DSA |
228 | offloading. | 270 | offloading. |
229 | migration/multifd: Prepare to introduce DSA acceleration on the | ||
230 | multifd path. | ||
231 | migration/multifd: Enable DSA offloading in multifd sender path. | 271 | migration/multifd: Enable DSA offloading in multifd sender path. |
232 | migration/multifd: Add test hook to set normal page ratio. | ||
233 | migration/multifd: Enable set normal page ratio test hook in multifd. | ||
234 | migration/multifd: Add migration option set packet size. | ||
235 | migration/multifd: Enable set packet size migration option. | ||
236 | util/dsa: Add unit test coverage for Intel DSA task submission and | 272 | util/dsa: Add unit test coverage for Intel DSA task submission and |
237 | completion. | 273 | completion. |
238 | migration/multifd: Add integration tests for multifd with Intel DSA | 274 | migration/multifd: Add integration tests for multifd with Intel DSA |
239 | offloading. | 275 | offloading. |
240 | 276 | ||
241 | Juan Quintela (4): | 277 | Yichen Wang (1): |
242 | multifd: Add capability to enable/disable zero_page | 278 | util/dsa: Add idxd into linux header copy list. |
243 | multifd: Support for zero pages transmission | 279 | |
244 | multifd: Zero pages transmission | 280 | Yuan Liu (1): |
245 | So we use multifd to transmit zero pages. | 281 | migration/doc: Add DSA zero page detection doc |
246 | 282 | ||
247 | include/qemu/dsa.h | 119 ++++ | 283 | .../migration/dsa-zero-page-detection.rst | 290 +++++ |
248 | linux-headers/linux/idxd.h | 356 ++++++++++ | 284 | docs/devel/migration/features.rst | 1 + |
249 | meson.build | 2 + | 285 | hmp-commands.hx | 2 +- |
250 | meson_options.txt | 2 + | 286 | include/qemu/dsa.h | 190 +++ |
251 | migration/migration-hmp-cmds.c | 22 + | 287 | meson.build | 14 + |
252 | migration/multifd-zlib.c | 8 +- | 288 | meson_options.txt | 2 + |
253 | migration/multifd-zstd.c | 8 +- | 289 | migration/migration-hmp-cmds.c | 20 +- |
254 | migration/multifd.c | 203 +++++- | 290 | migration/multifd-zero-page.c | 149 ++- |
255 | migration/multifd.h | 28 +- | 291 | migration/multifd.c | 23 +- |
256 | migration/options.c | 107 +++ | 292 | migration/multifd.h | 6 + |
257 | migration/options.h | 4 + | 293 | migration/options.c | 43 + |
258 | migration/ram.c | 45 +- | 294 | migration/options.h | 2 + |
259 | migration/trace-events | 8 +- | 295 | qapi/migration.json | 32 +- |
260 | qapi/migration.json | 53 +- | 296 | scripts/meson-buildoptions.sh | 3 + |
261 | scripts/meson-buildoptions.sh | 3 + | 297 | scripts/update-linux-headers.sh | 2 +- |
262 | tests/qtest/migration-test.c | 77 ++- | 298 | tests/qtest/meson.build | 10 +- |
263 | tests/unit/meson.build | 6 + | 299 | tests/qtest/migration-test.c | 3 + |
264 | tests/unit/test-dsa.c | 466 +++++++++++++ | 300 | tests/qtest/migration/dsa-tests.c | 59 + |
265 | util/dsa.c | 1132 ++++++++++++++++++++++++++++++++ | 301 | tests/qtest/migration/framework.h | 1 + |
266 | util/meson.build | 1 + | 302 | tests/unit/meson.build | 6 + |
267 | 20 files changed, 2612 insertions(+), 38 deletions(-) | 303 | tests/unit/test-dsa.c | 504 ++++++++ |
304 | util/dsa.c | 1112 +++++++++++++++++ | ||
305 | util/meson.build | 3 + | ||
306 | 23 files changed, 2453 insertions(+), 24 deletions(-) | ||
307 | create mode 100644 docs/devel/migration/dsa-zero-page-detection.rst | ||
268 | create mode 100644 include/qemu/dsa.h | 308 | create mode 100644 include/qemu/dsa.h |
269 | create mode 100644 linux-headers/linux/idxd.h | 309 | create mode 100644 tests/qtest/migration/dsa-tests.c |
270 | create mode 100644 tests/unit/test-dsa.c | 310 | create mode 100644 tests/unit/test-dsa.c |
271 | create mode 100644 util/dsa.c | 311 | create mode 100644 util/dsa.c |
272 | 312 | ||
273 | -- | 313 | -- |
274 | 2.30.2 | 314 | Yichen Wang | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Juan Quintela <quintela@redhat.com> | ||
2 | 1 | ||
3 | We have to enable it by default until we introduce the new code. | ||
4 | |||
5 | Signed-off-by: Juan Quintela <quintela@redhat.com> | ||
6 | --- | ||
7 | migration/options.c | 13 +++++++++++++ | ||
8 | migration/options.h | 1 + | ||
9 | qapi/migration.json | 8 +++++++- | ||
10 | 3 files changed, 21 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/migration/options.c b/migration/options.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/migration/options.c | ||
15 | +++ b/migration/options.c | ||
16 | @@ -XXX,XX +XXX,XX @@ Property migration_properties[] = { | ||
17 | DEFINE_PROP_MIG_CAP("x-switchover-ack", | ||
18 | MIGRATION_CAPABILITY_SWITCHOVER_ACK), | ||
19 | DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), | ||
20 | + DEFINE_PROP_MIG_CAP("main-zero-page", | ||
21 | + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), | ||
22 | DEFINE_PROP_END_OF_LIST(), | ||
23 | }; | ||
24 | |||
25 | @@ -XXX,XX +XXX,XX @@ bool migrate_multifd(void) | ||
26 | return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; | ||
27 | } | ||
28 | |||
29 | +bool migrate_use_main_zero_page(void) | ||
30 | +{ | ||
31 | + //MigrationState *s; | ||
32 | + | ||
33 | + //s = migrate_get_current(); | ||
34 | + | ||
35 | + // We will enable this when we add the right code. | ||
36 | + // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; | ||
37 | + return true; | ||
38 | +} | ||
39 | + | ||
40 | bool migrate_pause_before_switchover(void) | ||
41 | { | ||
42 | MigrationState *s = migrate_get_current(); | ||
43 | diff --git a/migration/options.h b/migration/options.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/migration/options.h | ||
46 | +++ b/migration/options.h | ||
47 | @@ -XXX,XX +XXX,XX @@ int migrate_multifd_channels(void); | ||
48 | MultiFDCompression migrate_multifd_compression(void); | ||
49 | int migrate_multifd_zlib_level(void); | ||
50 | int migrate_multifd_zstd_level(void); | ||
51 | +bool migrate_use_main_zero_page(void); | ||
52 | uint8_t migrate_throttle_trigger_threshold(void); | ||
53 | const char *migrate_tls_authz(void); | ||
54 | const char *migrate_tls_creds(void); | ||
55 | diff --git a/qapi/migration.json b/qapi/migration.json | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/qapi/migration.json | ||
58 | +++ b/qapi/migration.json | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | # and can result in more stable read performance. Requires KVM | ||
61 | # with accelerator property "dirty-ring-size" set. (Since 8.1) | ||
62 | # | ||
63 | +# | ||
64 | +# @main-zero-page: If enabled, the detection of zero pages will be | ||
65 | +# done on the main thread. Otherwise it is done on | ||
66 | +# the multifd threads. | ||
67 | +# (since 8.2) | ||
68 | +# | ||
69 | # Features: | ||
70 | # | ||
71 | # @deprecated: Member @block is deprecated. Use blockdev-mirror with | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, | ||
74 | 'validate-uuid', 'background-snapshot', | ||
75 | 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', | ||
76 | - 'dirty-limit'] } | ||
77 | + 'dirty-limit', 'main-zero-page'] } | ||
78 | |||
79 | ## | ||
80 | # @MigrationCapabilityStatus: | ||
81 | -- | ||
82 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Juan Quintela <quintela@redhat.com> | ||
2 | 1 | ||
3 | This patch adds counters and similar. Logic will be added on the | ||
4 | following patch. | ||
5 | |||
6 | Signed-off-by: Juan Quintela <quintela@redhat.com> | ||
7 | --- | ||
8 | migration/multifd.c | 37 ++++++++++++++++++++++++++++++------- | ||
9 | migration/multifd.h | 17 ++++++++++++++++- | ||
10 | migration/trace-events | 8 ++++---- | ||
11 | 3 files changed, 50 insertions(+), 12 deletions(-) | ||
12 | |||
13 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/migration/multifd.c | ||
16 | +++ b/migration/multifd.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void multifd_send_fill_packet(MultiFDSendParams *p) | ||
18 | packet->normal_pages = cpu_to_be32(p->normal_num); | ||
19 | packet->next_packet_size = cpu_to_be32(p->next_packet_size); | ||
20 | packet->packet_num = cpu_to_be64(p->packet_num); | ||
21 | + packet->zero_pages = cpu_to_be32(p->zero_num); | ||
22 | |||
23 | if (p->pages->block) { | ||
24 | strncpy(packet->ramblock, p->pages->block->idstr, 256); | ||
25 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
26 | p->next_packet_size = be32_to_cpu(packet->next_packet_size); | ||
27 | p->packet_num = be64_to_cpu(packet->packet_num); | ||
28 | |||
29 | - if (p->normal_num == 0) { | ||
30 | + p->zero_num = be32_to_cpu(packet->zero_pages); | ||
31 | + if (p->zero_num > packet->pages_alloc - p->normal_num) { | ||
32 | + error_setg(errp, "multifd: received packet " | ||
33 | + "with %u zero pages and expected maximum pages are %u", | ||
34 | + p->zero_num, packet->pages_alloc - p->normal_num) ; | ||
35 | + return -1; | ||
36 | + } | ||
37 | + | ||
38 | + if (p->normal_num == 0 && p->zero_num == 0) { | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | @@ -XXX,XX +XXX,XX @@ static int multifd_send_pages(QEMUFile *f) | ||
43 | p->packet_num = multifd_send_state->packet_num++; | ||
44 | multifd_send_state->pages = p->pages; | ||
45 | p->pages = pages; | ||
46 | + | ||
47 | qemu_mutex_unlock(&p->mutex); | ||
48 | qemu_sem_post(&p->sem); | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ void multifd_save_cleanup(void) | ||
51 | p->iov = NULL; | ||
52 | g_free(p->normal); | ||
53 | p->normal = NULL; | ||
54 | + g_free(p->zero); | ||
55 | + p->zero = NULL; | ||
56 | multifd_send_state->ops->send_cleanup(p, &local_err); | ||
57 | if (local_err) { | ||
58 | migrate_set_error(migrate_get_current(), local_err); | ||
59 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
60 | uint64_t packet_num = p->packet_num; | ||
61 | uint32_t flags; | ||
62 | p->normal_num = 0; | ||
63 | + p->zero_num = 0; | ||
64 | |||
65 | if (use_zero_copy_send) { | ||
66 | p->iovs_num = 0; | ||
67 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
68 | p->flags = 0; | ||
69 | p->num_packets++; | ||
70 | p->total_normal_pages += p->normal_num; | ||
71 | + p->total_zero_pages += p->zero_num; | ||
72 | p->pages->num = 0; | ||
73 | p->pages->block = NULL; | ||
74 | qemu_mutex_unlock(&p->mutex); | ||
75 | |||
76 | - trace_multifd_send(p->id, packet_num, p->normal_num, flags, | ||
77 | - p->next_packet_size); | ||
78 | + trace_multifd_send(p->id, packet_num, p->normal_num, p->zero_num, | ||
79 | + flags, p->next_packet_size); | ||
80 | |||
81 | if (use_zero_copy_send) { | ||
82 | /* Send header first, without zerocopy */ | ||
83 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
84 | |||
85 | stat64_add(&mig_stats.multifd_bytes, | ||
86 | p->next_packet_size + p->packet_len); | ||
87 | + stat64_add(&mig_stats.normal_pages, p->normal_num); | ||
88 | + stat64_add(&mig_stats.zero_pages, p->zero_num); | ||
89 | p->next_packet_size = 0; | ||
90 | qemu_mutex_lock(&p->mutex); | ||
91 | p->pending_job--; | ||
92 | @@ -XXX,XX +XXX,XX @@ out: | ||
93 | |||
94 | rcu_unregister_thread(); | ||
95 | migration_threads_remove(thread); | ||
96 | - trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); | ||
97 | + trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages, | ||
98 | + p->total_zero_pages); | ||
99 | |||
100 | return NULL; | ||
101 | } | ||
102 | @@ -XXX,XX +XXX,XX @@ int multifd_save_setup(Error **errp) | ||
103 | p->normal = g_new0(ram_addr_t, page_count); | ||
104 | p->page_size = qemu_target_page_size(); | ||
105 | p->page_count = page_count; | ||
106 | + p->zero = g_new0(ram_addr_t, page_count); | ||
107 | |||
108 | if (migrate_zero_copy_send()) { | ||
109 | p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; | ||
110 | @@ -XXX,XX +XXX,XX @@ void multifd_load_cleanup(void) | ||
111 | p->iov = NULL; | ||
112 | g_free(p->normal); | ||
113 | p->normal = NULL; | ||
114 | + g_free(p->zero); | ||
115 | + p->zero = NULL; | ||
116 | multifd_recv_state->ops->recv_cleanup(p); | ||
117 | } | ||
118 | qemu_sem_destroy(&multifd_recv_state->sem_sync); | ||
119 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
120 | flags = p->flags; | ||
121 | /* recv methods don't know how to handle the SYNC flag */ | ||
122 | p->flags &= ~MULTIFD_FLAG_SYNC; | ||
123 | - trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, | ||
124 | - p->next_packet_size); | ||
125 | + trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num, | ||
126 | + flags, p->next_packet_size); | ||
127 | p->num_packets++; | ||
128 | p->total_normal_pages += p->normal_num; | ||
129 | + p->total_zero_pages += p->zero_num; | ||
130 | qemu_mutex_unlock(&p->mutex); | ||
131 | |||
132 | if (p->normal_num) { | ||
133 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
134 | qemu_mutex_unlock(&p->mutex); | ||
135 | |||
136 | rcu_unregister_thread(); | ||
137 | - trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages); | ||
138 | + trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages, | ||
139 | + p->total_zero_pages); | ||
140 | |||
141 | return NULL; | ||
142 | } | ||
143 | @@ -XXX,XX +XXX,XX @@ int multifd_load_setup(Error **errp) | ||
144 | p->normal = g_new0(ram_addr_t, page_count); | ||
145 | p->page_count = page_count; | ||
146 | p->page_size = qemu_target_page_size(); | ||
147 | + p->zero = g_new0(ram_addr_t, page_count); | ||
148 | } | ||
149 | |||
150 | for (i = 0; i < thread_count; i++) { | ||
151 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/migration/multifd.h | ||
154 | +++ b/migration/multifd.h | ||
155 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
156 | /* size of the next packet that contains pages */ | ||
157 | uint32_t next_packet_size; | ||
158 | uint64_t packet_num; | ||
159 | - uint64_t unused[4]; /* Reserved for future use */ | ||
160 | + /* zero pages */ | ||
161 | + uint32_t zero_pages; | ||
162 | + uint32_t unused32[1]; /* Reserved for future use */ | ||
163 | + uint64_t unused64[3]; /* Reserved for future use */ | ||
164 | char ramblock[256]; | ||
165 | uint64_t offset[]; | ||
166 | } __attribute__((packed)) MultiFDPacket_t; | ||
167 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
168 | uint64_t num_packets; | ||
169 | /* non zero pages sent through this channel */ | ||
170 | uint64_t total_normal_pages; | ||
171 | + /* zero pages sent through this channel */ | ||
172 | + uint64_t total_zero_pages; | ||
173 | /* buffers to send */ | ||
174 | struct iovec *iov; | ||
175 | /* number of iovs used */ | ||
176 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
177 | ram_addr_t *normal; | ||
178 | /* num of non zero pages */ | ||
179 | uint32_t normal_num; | ||
180 | + /* Pages that are zero */ | ||
181 | + ram_addr_t *zero; | ||
182 | + /* num of zero pages */ | ||
183 | + uint32_t zero_num; | ||
184 | /* used for compression methods */ | ||
185 | void *data; | ||
186 | } MultiFDSendParams; | ||
187 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
188 | uint8_t *host; | ||
189 | /* non zero pages recv through this channel */ | ||
190 | uint64_t total_normal_pages; | ||
191 | + /* zero pages recv through this channel */ | ||
192 | + uint64_t total_zero_pages; | ||
193 | /* buffers to recv */ | ||
194 | struct iovec *iov; | ||
195 | /* Pages that are not zero */ | ||
196 | ram_addr_t *normal; | ||
197 | /* num of non zero pages */ | ||
198 | uint32_t normal_num; | ||
199 | + /* Pages that are zero */ | ||
200 | + ram_addr_t *zero; | ||
201 | + /* num of zero pages */ | ||
202 | + uint32_t zero_num; | ||
203 | /* used for de-compression methods */ | ||
204 | void *data; | ||
205 | } MultiFDRecvParams; | ||
206 | diff --git a/migration/trace-events b/migration/trace-events | ||
207 | index XXXXXXX..XXXXXXX 100644 | ||
208 | --- a/migration/trace-events | ||
209 | +++ b/migration/trace-events | ||
210 | @@ -XXX,XX +XXX,XX @@ postcopy_preempt_reset_channel(void) "" | ||
211 | # multifd.c | ||
212 | multifd_new_send_channel_async(uint8_t id) "channel %u" | ||
213 | multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p" | ||
214 | -multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" | ||
215 | +multifd_recv(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t zero, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" | ||
216 | multifd_recv_new_channel(uint8_t id) "channel %u" | ||
217 | multifd_recv_sync_main(long packet_num) "packet num %ld" | ||
218 | multifd_recv_sync_main_signal(uint8_t id) "channel %u" | ||
219 | multifd_recv_sync_main_wait(uint8_t id) "channel %u" | ||
220 | multifd_recv_terminate_threads(bool error) "error %d" | ||
221 | -multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 | ||
222 | +multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 | ||
223 | multifd_recv_thread_start(uint8_t id) "%u" | ||
224 | -multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" | ||
225 | +multifd_send(uint8_t id, uint64_t packet_num, uint32_t normalpages, uint32_t zero_pages, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" | ||
226 | multifd_send_error(uint8_t id) "channel %u" | ||
227 | multifd_send_sync_main(long packet_num) "packet num %ld" | ||
228 | multifd_send_sync_main_signal(uint8_t id) "channel %u" | ||
229 | multifd_send_sync_main_wait(uint8_t id) "channel %u" | ||
230 | multifd_send_terminate_threads(bool error) "error %d" | ||
231 | -multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 | ||
232 | +multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 | ||
233 | multifd_send_thread_start(uint8_t id) "%u" | ||
234 | multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" | ||
235 | multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" | ||
236 | -- | ||
237 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Juan Quintela <quintela@redhat.com> | ||
2 | 1 | ||
3 | This implements the zero page dection and handling. | ||
4 | |||
5 | Signed-off-by: Juan Quintela <quintela@redhat.com> | ||
6 | --- | ||
7 | migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- | ||
8 | migration/multifd.h | 5 +++++ | ||
9 | 2 files changed, 44 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/migration/multifd.c | ||
14 | +++ b/migration/multifd.c | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | */ | ||
17 | |||
18 | #include "qemu/osdep.h" | ||
19 | +#include "qemu/cutils.h" | ||
20 | #include "qemu/rcu.h" | ||
21 | #include "exec/target_page.h" | ||
22 | #include "sysemu/sysemu.h" | ||
23 | @@ -XXX,XX +XXX,XX @@ static void multifd_send_fill_packet(MultiFDSendParams *p) | ||
24 | |||
25 | packet->offset[i] = cpu_to_be64(temp); | ||
26 | } | ||
27 | + for (i = 0; i < p->zero_num; i++) { | ||
28 | + /* there are architectures where ram_addr_t is 32 bit */ | ||
29 | + uint64_t temp = p->zero[i]; | ||
30 | + | ||
31 | + packet->offset[p->normal_num + i] = cpu_to_be64(temp); | ||
32 | + } | ||
33 | } | ||
34 | |||
35 | static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
36 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
37 | p->normal[i] = offset; | ||
38 | } | ||
39 | |||
40 | + for (i = 0; i < p->zero_num; i++) { | ||
41 | + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); | ||
42 | + | ||
43 | + if (offset > (p->block->used_length - p->page_size)) { | ||
44 | + error_setg(errp, "multifd: offset too long %" PRIu64 | ||
45 | + " (max " RAM_ADDR_FMT ")", | ||
46 | + offset, p->block->used_length); | ||
47 | + return -1; | ||
48 | + } | ||
49 | + p->zero[i] = offset; | ||
50 | + } | ||
51 | + | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
56 | MultiFDSendParams *p = opaque; | ||
57 | MigrationThread *thread = NULL; | ||
58 | Error *local_err = NULL; | ||
59 | + /* qemu older than 8.2 don't understand zero page on multifd channel */ | ||
60 | + bool use_zero_page = !migrate_use_main_zero_page(); | ||
61 | int ret = 0; | ||
62 | bool use_zero_copy_send = migrate_zero_copy_send(); | ||
63 | |||
64 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
65 | qemu_mutex_lock(&p->mutex); | ||
66 | |||
67 | if (p->pending_job) { | ||
68 | + RAMBlock *rb = p->pages->block; | ||
69 | uint64_t packet_num = p->packet_num; | ||
70 | uint32_t flags; | ||
71 | p->normal_num = 0; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
73 | } | ||
74 | |||
75 | for (int i = 0; i < p->pages->num; i++) { | ||
76 | - p->normal[p->normal_num] = p->pages->offset[i]; | ||
77 | - p->normal_num++; | ||
78 | + uint64_t offset = p->pages->offset[i]; | ||
79 | + if (use_zero_page && | ||
80 | + buffer_is_zero(rb->host + offset, p->page_size)) { | ||
81 | + p->zero[p->zero_num] = offset; | ||
82 | + p->zero_num++; | ||
83 | + ram_release_page(rb->idstr, offset); | ||
84 | + } else { | ||
85 | + p->normal[p->normal_num] = offset; | ||
86 | + p->normal_num++; | ||
87 | + } | ||
88 | } | ||
89 | |||
90 | if (p->normal_num) { | ||
91 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
92 | } | ||
93 | } | ||
94 | |||
95 | + for (int i = 0; i < p->zero_num; i++) { | ||
96 | + void *page = p->host + p->zero[i]; | ||
97 | + if (!buffer_is_zero(page, p->page_size)) { | ||
98 | + memset(page, 0, p->page_size); | ||
99 | + } | ||
100 | + } | ||
101 | + | ||
102 | if (flags & MULTIFD_FLAG_SYNC) { | ||
103 | qemu_sem_post(&multifd_recv_state->sem_sync); | ||
104 | qemu_sem_wait(&p->sem_sync); | ||
105 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/migration/multifd.h | ||
108 | +++ b/migration/multifd.h | ||
109 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
110 | uint32_t unused32[1]; /* Reserved for future use */ | ||
111 | uint64_t unused64[3]; /* Reserved for future use */ | ||
112 | char ramblock[256]; | ||
113 | + /* | ||
114 | + * This array contains the pointers to: | ||
115 | + * - normal pages (initial normal_pages entries) | ||
116 | + * - zero pages (following zero_pages entries) | ||
117 | + */ | ||
118 | uint64_t offset[]; | ||
119 | } __attribute__((packed)) MultiFDPacket_t; | ||
120 | |||
121 | -- | ||
122 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Juan Quintela <quintela@redhat.com> | ||
2 | 1 | ||
3 | Signed-off-by: Juan Quintela <quintela@redhat.com> | ||
4 | Reviewed-by: Leonardo Bras <leobras@redhat.com> | ||
5 | --- | ||
6 | migration/multifd.c | 7 ++++--- | ||
7 | migration/options.c | 13 +++++++------ | ||
8 | migration/ram.c | 45 ++++++++++++++++++++++++++++++++++++++------- | ||
9 | qapi/migration.json | 1 - | ||
10 | 4 files changed, 49 insertions(+), 17 deletions(-) | ||
11 | |||
12 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/migration/multifd.c | ||
15 | +++ b/migration/multifd.c | ||
16 | @@ -XXX,XX +XXX,XX @@ | ||
17 | #include "qemu/osdep.h" | ||
18 | #include "qemu/cutils.h" | ||
19 | #include "qemu/rcu.h" | ||
20 | +#include "qemu/cutils.h" | ||
21 | #include "exec/target_page.h" | ||
22 | #include "sysemu/sysemu.h" | ||
23 | #include "exec/ramblock.h" | ||
24 | @@ -XXX,XX +XXX,XX @@ static int multifd_send_pages(QEMUFile *f) | ||
25 | p->packet_num = multifd_send_state->packet_num++; | ||
26 | multifd_send_state->pages = p->pages; | ||
27 | p->pages = pages; | ||
28 | - | ||
29 | qemu_mutex_unlock(&p->mutex); | ||
30 | qemu_sem_post(&p->sem); | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
33 | MigrationThread *thread = NULL; | ||
34 | Error *local_err = NULL; | ||
35 | /* qemu older than 8.2 don't understand zero page on multifd channel */ | ||
36 | - bool use_zero_page = !migrate_use_main_zero_page(); | ||
37 | + bool use_multifd_zero_page = !migrate_use_main_zero_page(); | ||
38 | int ret = 0; | ||
39 | bool use_zero_copy_send = migrate_zero_copy_send(); | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
42 | RAMBlock *rb = p->pages->block; | ||
43 | uint64_t packet_num = p->packet_num; | ||
44 | uint32_t flags; | ||
45 | + | ||
46 | p->normal_num = 0; | ||
47 | p->zero_num = 0; | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
50 | |||
51 | for (int i = 0; i < p->pages->num; i++) { | ||
52 | uint64_t offset = p->pages->offset[i]; | ||
53 | - if (use_zero_page && | ||
54 | + if (use_multifd_zero_page && | ||
55 | buffer_is_zero(rb->host + offset, p->page_size)) { | ||
56 | p->zero[p->zero_num] = offset; | ||
57 | p->zero_num++; | ||
58 | diff --git a/migration/options.c b/migration/options.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/migration/options.c | ||
61 | +++ b/migration/options.c | ||
62 | @@ -XXX,XX +XXX,XX @@ Property migration_properties[] = { | ||
63 | DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), | ||
64 | DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), | ||
65 | DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), | ||
66 | + DEFINE_PROP_MIG_CAP("x-main-zero-page", MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), | ||
67 | DEFINE_PROP_MIG_CAP("x-background-snapshot", | ||
68 | MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), | ||
69 | #ifdef CONFIG_LINUX | ||
70 | @@ -XXX,XX +XXX,XX @@ bool migrate_multifd(void) | ||
71 | |||
72 | bool migrate_use_main_zero_page(void) | ||
73 | { | ||
74 | - //MigrationState *s; | ||
75 | - | ||
76 | - //s = migrate_get_current(); | ||
77 | + MigrationState *s = migrate_get_current(); | ||
78 | |||
79 | - // We will enable this when we add the right code. | ||
80 | - // return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; | ||
81 | - return true; | ||
82 | + return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; | ||
83 | } | ||
84 | |||
85 | bool migrate_pause_before_switchover(void) | ||
86 | @@ -XXX,XX +XXX,XX @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, | ||
87 | MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, | ||
88 | MIGRATION_CAPABILITY_RETURN_PATH, | ||
89 | MIGRATION_CAPABILITY_MULTIFD, | ||
90 | + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE, | ||
91 | MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, | ||
92 | MIGRATION_CAPABILITY_AUTO_CONVERGE, | ||
93 | MIGRATION_CAPABILITY_RELEASE_RAM, | ||
94 | @@ -XXX,XX +XXX,XX @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) | ||
95 | error_setg(errp, "Postcopy is not yet compatible with multifd"); | ||
96 | return false; | ||
97 | } | ||
98 | + if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) { | ||
99 | + error_setg(errp, "Postcopy is not yet compatible with main zero copy"); | ||
100 | + } | ||
101 | } | ||
102 | |||
103 | if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { | ||
104 | diff --git a/migration/ram.c b/migration/ram.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/migration/ram.c | ||
107 | +++ b/migration/ram.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) | ||
109 | if (save_zero_page(rs, pss, offset)) { | ||
110 | return 1; | ||
111 | } | ||
112 | - | ||
113 | /* | ||
114 | - * Do not use multifd in postcopy as one whole host page should be | ||
115 | - * placed. Meanwhile postcopy requires atomic update of pages, so even | ||
116 | - * if host page size == guest page size the dest guest during run may | ||
117 | - * still see partially copied pages which is data corruption. | ||
118 | + * Do not use multifd for: | ||
119 | + * 1. Compression as the first page in the new block should be posted out | ||
120 | + * before sending the compressed page | ||
121 | + * 2. In postcopy as one whole host page should be placed | ||
122 | */ | ||
123 | - if (migrate_multifd() && !migration_in_postcopy()) { | ||
124 | + if (!migrate_compress() && migrate_multifd() && !migration_in_postcopy()) { | ||
125 | + return ram_save_multifd_page(pss->pss_channel, block, offset); | ||
126 | + } | ||
127 | + | ||
128 | + return ram_save_page(rs, pss); | ||
129 | +} | ||
130 | + | ||
131 | +/** | ||
132 | + * ram_save_target_page_multifd: save one target page | ||
133 | + * | ||
134 | + * Returns the number of pages written | ||
135 | + * | ||
136 | + * @rs: current RAM state | ||
137 | + * @pss: data about the page we want to send | ||
138 | + */ | ||
139 | +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss) | ||
140 | +{ | ||
141 | + RAMBlock *block = pss->block; | ||
142 | + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; | ||
143 | + int res; | ||
144 | + | ||
145 | + if (!migration_in_postcopy()) { | ||
146 | return ram_save_multifd_page(pss->pss_channel, block, offset); | ||
147 | } | ||
148 | |||
149 | + res = save_zero_page(rs, pss, offset); | ||
150 | + if (res > 0) { | ||
151 | + return res; | ||
152 | + } | ||
153 | + | ||
154 | return ram_save_page(rs, pss); | ||
155 | } | ||
156 | |||
157 | @@ -XXX,XX +XXX,XX @@ static int ram_save_setup(QEMUFile *f, void *opaque) | ||
158 | } | ||
159 | |||
160 | migration_ops = g_malloc0(sizeof(MigrationOps)); | ||
161 | - migration_ops->ram_save_target_page = ram_save_target_page_legacy; | ||
162 | + | ||
163 | + if (migrate_multifd() && !migrate_use_main_zero_page()) { | ||
164 | + migration_ops->ram_save_target_page = ram_save_target_page_multifd; | ||
165 | + } else { | ||
166 | + migration_ops->ram_save_target_page = ram_save_target_page_legacy; | ||
167 | + } | ||
168 | |||
169 | qemu_mutex_unlock_iothread(); | ||
170 | + | ||
171 | ret = multifd_send_sync_main(f); | ||
172 | qemu_mutex_lock_iothread(); | ||
173 | if (ret < 0) { | ||
174 | diff --git a/qapi/migration.json b/qapi/migration.json | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/qapi/migration.json | ||
177 | +++ b/qapi/migration.json | ||
178 | @@ -XXX,XX +XXX,XX @@ | ||
179 | # and can result in more stable read performance. Requires KVM | ||
180 | # with accelerator property "dirty-ring-size" set. (Since 8.1) | ||
181 | # | ||
182 | -# | ||
183 | # @main-zero-page: If enabled, the detection of zero pages will be | ||
184 | # done on the main thread. Otherwise it is done on | ||
185 | # the multifd threads. | ||
186 | -- | ||
187 | 2.30.2 | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | Enable instruction set enqcmd in build. | 3 | Enable instruction set enqcmd in build. |
2 | 4 | ||
3 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 5 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
6 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
7 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
4 | --- | 8 | --- |
5 | meson.build | 2 ++ | 9 | meson.build | 14 ++++++++++++++ |
6 | meson_options.txt | 2 ++ | 10 | meson_options.txt | 2 ++ |
7 | scripts/meson-buildoptions.sh | 3 +++ | 11 | scripts/meson-buildoptions.sh | 3 +++ |
8 | 3 files changed, 7 insertions(+) | 12 | 3 files changed, 19 insertions(+) |
9 | 13 | ||
10 | diff --git a/meson.build b/meson.build | 14 | diff --git a/meson.build b/meson.build |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/meson.build | 16 | --- a/meson.build |
13 | +++ b/meson.build | 17 | +++ b/meson.build |
14 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ | 18 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ |
15 | int main(int argc, char *argv[]) { return bar(argv[0]); } | 19 | int main(int argc, char *argv[]) { return bar(argv[0]); } |
16 | '''), error_message: 'AVX512BW not available').allowed()) | 20 | '''), error_message: 'AVX512BW not available').allowed()) |
17 | 21 | ||
18 | +config_host_data.set('CONFIG_DSA_OPT', get_option('enqcmd')) | 22 | +config_host_data.set('CONFIG_DSA_OPT', get_option('enqcmd') \ |
23 | + .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable ENQCMD') \ | ||
24 | + .require(cc.links(''' | ||
25 | + #include <stdint.h> | ||
26 | + #include <cpuid.h> | ||
27 | + #include <immintrin.h> | ||
28 | + static int __attribute__((target("enqcmd"))) bar(void *a) { | ||
29 | + uint64_t dst[8] = { 0 }; | ||
30 | + uint64_t src[8] = { 0 }; | ||
31 | + return _enqcmd(dst, src); | ||
32 | + } | ||
33 | + int main(int argc, char *argv[]) { return bar(argv[argc - 1]); } | ||
34 | + '''), error_message: 'ENQCMD not available').allowed()) | ||
19 | + | 35 | + |
20 | # For both AArch64 and AArch32, detect if builtins are available. | 36 | # For both AArch64 and AArch32, detect if builtins are available. |
21 | config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles(''' | 37 | config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles(''' |
22 | #include <arm_neon.h> | 38 | #include <arm_neon.h> |
23 | diff --git a/meson_options.txt b/meson_options.txt | 39 | diff --git a/meson_options.txt b/meson_options.txt |
24 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/meson_options.txt | 41 | --- a/meson_options.txt |
26 | +++ b/meson_options.txt | 42 | +++ b/meson_options.txt |
27 | @@ -XXX,XX +XXX,XX @@ option('avx512f', type: 'feature', value: 'disabled', | 43 | @@ -XXX,XX +XXX,XX @@ option('avx2', type: 'feature', value: 'auto', |
28 | description: 'AVX512F optimizations') | 44 | description: 'AVX2 optimizations') |
29 | option('avx512bw', type: 'feature', value: 'auto', | 45 | option('avx512bw', type: 'feature', value: 'auto', |
30 | description: 'AVX512BW optimizations') | 46 | description: 'AVX512BW optimizations') |
31 | +option('enqcmd', type: 'boolean', value: false, | 47 | +option('enqcmd', type: 'feature', value: 'disabled', |
32 | + description: 'MENQCMD optimizations') | 48 | + description: 'ENQCMD optimizations') |
33 | option('keyring', type: 'feature', value: 'auto', | 49 | option('keyring', type: 'feature', value: 'auto', |
34 | description: 'Linux keyring support') | 50 | description: 'Linux keyring support') |
35 | option('libkeyutils', type: 'feature', value: 'auto', | 51 | option('libkeyutils', type: 'feature', value: 'auto', |
36 | diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh | 52 | diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh |
37 | index XXXXXXX..XXXXXXX 100644 | 53 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/scripts/meson-buildoptions.sh | 54 | --- a/scripts/meson-buildoptions.sh |
39 | +++ b/scripts/meson-buildoptions.sh | 55 | +++ b/scripts/meson-buildoptions.sh |
40 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { | 56 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { |
57 | printf "%s\n" ' auth-pam PAM access control' | ||
41 | printf "%s\n" ' avx2 AVX2 optimizations' | 58 | printf "%s\n" ' avx2 AVX2 optimizations' |
42 | printf "%s\n" ' avx512bw AVX512BW optimizations' | 59 | printf "%s\n" ' avx512bw AVX512BW optimizations' |
43 | printf "%s\n" ' avx512f AVX512F optimizations' | ||
44 | + printf "%s\n" ' enqcmd ENQCMD optimizations' | 60 | + printf "%s\n" ' enqcmd ENQCMD optimizations' |
45 | printf "%s\n" ' blkio libblkio block device driver' | 61 | printf "%s\n" ' blkio libblkio block device driver' |
46 | printf "%s\n" ' bochs bochs image format support' | 62 | printf "%s\n" ' bochs bochs image format support' |
47 | printf "%s\n" ' bpf eBPF support' | 63 | printf "%s\n" ' bpf eBPF support' |
48 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { | 64 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { |
65 | --disable-avx2) printf "%s" -Davx2=disabled ;; | ||
66 | --enable-avx512bw) printf "%s" -Davx512bw=enabled ;; | ||
49 | --disable-avx512bw) printf "%s" -Davx512bw=disabled ;; | 67 | --disable-avx512bw) printf "%s" -Davx512bw=disabled ;; |
50 | --enable-avx512f) printf "%s" -Davx512f=enabled ;; | 68 | + --enable-enqcmd) printf "%s" -Denqcmd=enabled ;; |
51 | --disable-avx512f) printf "%s" -Davx512f=disabled ;; | 69 | + --disable-enqcmd) printf "%s" -Denqcmd=disabled ;; |
52 | + --enable-enqcmd) printf "%s" -Denqcmd=true ;; | ||
53 | + --disable-enqcmd) printf "%s" -Denqcmd=false ;; | ||
54 | --enable-gcov) printf "%s" -Db_coverage=true ;; | 70 | --enable-gcov) printf "%s" -Db_coverage=true ;; |
55 | --disable-gcov) printf "%s" -Db_coverage=false ;; | 71 | --disable-gcov) printf "%s" -Db_coverage=false ;; |
56 | --enable-lto) printf "%s" -Db_lto=true ;; | 72 | --enable-lto) printf "%s" -Db_lto=true ;; |
57 | -- | 73 | -- |
58 | 2.30.2 | 74 | Yichen Wang | diff view generated by jsdifflib |
1 | 1. Refactor multifd_send_thread function. | 1 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> |
---|---|---|---|
2 | 2. Implement buffer_is_zero_use_cpu to handle CPU based zero page | 2 | Reviewed-by: Fabiano Rosas <farosas@suse.de> |
3 | checking. | 3 | --- |
4 | 3. Introduce the batch task structure in MultiFDSendParams. | 4 | scripts/update-linux-headers.sh | 2 +- |
5 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
5 | 6 | ||
6 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 7 | diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh |
7 | --- | 8 | index XXXXXXX..XXXXXXX 100755 |
8 | migration/multifd.c | 82 ++++++++++++++++++++++++++++++++++++--------- | 9 | --- a/scripts/update-linux-headers.sh |
9 | migration/multifd.h | 3 ++ | 10 | +++ b/scripts/update-linux-headers.sh |
10 | 2 files changed, 70 insertions(+), 15 deletions(-) | 11 | @@ -XXX,XX +XXX,XX @@ rm -rf "$output/linux-headers/linux" |
11 | 12 | mkdir -p "$output/linux-headers/linux" | |
12 | diff --git a/migration/multifd.c b/migration/multifd.c | 13 | for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ |
14 | --- a/migration/multifd.c | 15 | - vduse.h iommufd.h bits.h; do |
15 | +++ b/migration/multifd.c | 16 | + vduse.h iommufd.h bits.h idxd.h; do |
16 | @@ -XXX,XX +XXX,XX @@ | 17 | cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" |
17 | #include "qemu/cutils.h" | 18 | done |
18 | #include "qemu/rcu.h" | ||
19 | #include "qemu/cutils.h" | ||
20 | +#include "qemu/dsa.h" | ||
21 | +#include "qemu/memalign.h" | ||
22 | #include "exec/target_page.h" | ||
23 | #include "sysemu/sysemu.h" | ||
24 | #include "exec/ramblock.h" | ||
25 | @@ -XXX,XX +XXX,XX @@ void multifd_save_cleanup(void) | ||
26 | p->name = NULL; | ||
27 | multifd_pages_clear(p->pages); | ||
28 | p->pages = NULL; | ||
29 | + g_free(p->addr); | ||
30 | + p->addr = NULL; | ||
31 | + buffer_zero_batch_task_destroy(p->batch_task); | ||
32 | + qemu_vfree(p->batch_task); | ||
33 | + p->batch_task = NULL; | ||
34 | p->packet_len = 0; | ||
35 | g_free(p->packet); | ||
36 | p->packet = NULL; | ||
37 | @@ -XXX,XX +XXX,XX @@ int multifd_send_sync_main(QEMUFile *f) | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | +static void set_page(MultiFDSendParams *p, bool zero_page, uint64_t offset) | ||
42 | +{ | ||
43 | + RAMBlock *rb = p->pages->block; | ||
44 | + if (zero_page) { | ||
45 | + p->zero[p->zero_num] = offset; | ||
46 | + p->zero_num++; | ||
47 | + ram_release_page(rb->idstr, offset); | ||
48 | + } else { | ||
49 | + p->normal[p->normal_num] = offset; | ||
50 | + p->normal_num++; | ||
51 | + } | ||
52 | +} | ||
53 | + | ||
54 | +static void buffer_is_zero_use_cpu(MultiFDSendParams *p) | ||
55 | +{ | ||
56 | + const void **buf = (const void **)p->addr; | ||
57 | + assert(!migrate_use_main_zero_page()); | ||
58 | + | ||
59 | + for (int i = 0; i < p->pages->num; i++) { | ||
60 | + p->batch_task->results[i] = buffer_is_zero(buf[i], p->page_size); | ||
61 | + } | ||
62 | +} | ||
63 | + | ||
64 | +static void set_normal_pages(MultiFDSendParams *p) | ||
65 | +{ | ||
66 | + for (int i = 0; i < p->pages->num; i++) { | ||
67 | + p->batch_task->results[i] = false; | ||
68 | + } | ||
69 | +} | ||
70 | + | ||
71 | +static void multifd_zero_page_check(MultiFDSendParams *p) | ||
72 | +{ | ||
73 | + /* older qemu don't understand zero page on multifd channel */ | ||
74 | + bool use_multifd_zero_page = !migrate_use_main_zero_page(); | ||
75 | + | ||
76 | + RAMBlock *rb = p->pages->block; | ||
77 | + | ||
78 | + for (int i = 0; i < p->pages->num; i++) { | ||
79 | + p->addr[i] = (ram_addr_t)(rb->host + p->pages->offset[i]); | ||
80 | + } | ||
81 | + | ||
82 | + if (use_multifd_zero_page) { | ||
83 | + buffer_is_zero_use_cpu(p); | ||
84 | + } else { | ||
85 | + // No zero page checking. All pages are normal pages. | ||
86 | + set_normal_pages(p); | ||
87 | + } | ||
88 | + | ||
89 | + for (int i = 0; i < p->pages->num; i++) { | ||
90 | + uint64_t offset = p->pages->offset[i]; | ||
91 | + bool zero_page = p->batch_task->results[i]; | ||
92 | + set_page(p, zero_page, offset); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | static void *multifd_send_thread(void *opaque) | ||
97 | { | ||
98 | MultiFDSendParams *p = opaque; | ||
99 | MigrationThread *thread = NULL; | ||
100 | Error *local_err = NULL; | ||
101 | - /* qemu older than 8.2 don't understand zero page on multifd channel */ | ||
102 | - bool use_multifd_zero_page = !migrate_use_main_zero_page(); | ||
103 | int ret = 0; | ||
104 | bool use_zero_copy_send = migrate_zero_copy_send(); | ||
105 | |||
106 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
107 | qemu_mutex_lock(&p->mutex); | ||
108 | |||
109 | if (p->pending_job) { | ||
110 | - RAMBlock *rb = p->pages->block; | ||
111 | uint64_t packet_num = p->packet_num; | ||
112 | uint32_t flags; | ||
113 | |||
114 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
115 | p->iovs_num = 1; | ||
116 | } | ||
117 | |||
118 | - for (int i = 0; i < p->pages->num; i++) { | ||
119 | - uint64_t offset = p->pages->offset[i]; | ||
120 | - if (use_multifd_zero_page && | ||
121 | - buffer_is_zero(rb->host + offset, p->page_size)) { | ||
122 | - p->zero[p->zero_num] = offset; | ||
123 | - p->zero_num++; | ||
124 | - ram_release_page(rb->idstr, offset); | ||
125 | - } else { | ||
126 | - p->normal[p->normal_num] = offset; | ||
127 | - p->normal_num++; | ||
128 | - } | ||
129 | - } | ||
130 | + multifd_zero_page_check(p); | ||
131 | |||
132 | if (p->normal_num) { | ||
133 | ret = multifd_send_state->ops->send_prepare(p, &local_err); | ||
134 | @@ -XXX,XX +XXX,XX @@ int multifd_save_setup(Error **errp) | ||
135 | p->pending_job = 0; | ||
136 | p->id = i; | ||
137 | p->pages = multifd_pages_init(page_count); | ||
138 | + p->addr = g_new0(ram_addr_t, page_count); | ||
139 | + p->batch_task = | ||
140 | + (struct buffer_zero_batch_task *)qemu_memalign(64, sizeof(*p->batch_task)); | ||
141 | + buffer_zero_batch_task_init(p->batch_task, page_count); | ||
142 | p->packet_len = sizeof(MultiFDPacket_t) | ||
143 | + sizeof(uint64_t) * page_count; | ||
144 | p->packet = g_malloc0(p->packet_len); | ||
145 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/migration/multifd.h | ||
148 | +++ b/migration/multifd.h | ||
149 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
150 | * pending_job != 0 -> multifd_channel can use it. | ||
151 | */ | ||
152 | MultiFDPages_t *pages; | ||
153 | + /* Address of each pages in pages */ | ||
154 | + ram_addr_t *addr; | ||
155 | + struct buffer_zero_batch_task *batch_task; | ||
156 | |||
157 | /* thread local variables. No locking required */ | ||
158 | 19 | ||
159 | -- | 20 | -- |
160 | 2.30.2 | 21 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * DSA device open and close. | 3 | * DSA device open and close. |
2 | * DSA group contains multiple DSA devices. | 4 | * DSA group contains multiple DSA devices. |
3 | * DSA group configure/start/stop/clean. | 5 | * DSA group configure/start/stop/clean. |
4 | 6 | ||
5 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 7 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
6 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> | 8 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> |
9 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
10 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
7 | --- | 11 | --- |
8 | include/qemu/dsa.h | 49 +++++++ | 12 | include/qemu/dsa.h | 99 ++++++++++++++++ |
9 | util/dsa.c | 338 +++++++++++++++++++++++++++++++++++++++++++++ | 13 | util/dsa.c | 280 +++++++++++++++++++++++++++++++++++++++++++++ |
10 | util/meson.build | 1 + | 14 | util/meson.build | 3 + |
11 | 3 files changed, 388 insertions(+) | 15 | 3 files changed, 382 insertions(+) |
12 | create mode 100644 include/qemu/dsa.h | 16 | create mode 100644 include/qemu/dsa.h |
13 | create mode 100644 util/dsa.c | 17 | create mode 100644 util/dsa.c |
14 | 18 | ||
15 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | 19 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h |
16 | new file mode 100644 | 20 | new file mode 100644 |
17 | index XXXXXXX..XXXXXXX | 21 | index XXXXXXX..XXXXXXX |
18 | --- /dev/null | 22 | --- /dev/null |
19 | +++ b/include/qemu/dsa.h | 23 | +++ b/include/qemu/dsa.h |
20 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ |
25 | +/* | ||
26 | + * Interface for using Intel Data Streaming Accelerator to offload certain | ||
27 | + * background operations. | ||
28 | + * | ||
29 | + * Copyright (C) Bytedance Ltd. | ||
30 | + * | ||
31 | + * Authors: | ||
32 | + * Hao Xiang <hao.xiang@bytedance.com> | ||
33 | + * Yichen Wang <yichen.wang@bytedance.com> | ||
34 | + * | ||
35 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
36 | + * See the COPYING file in the top-level directory. | ||
37 | + */ | ||
38 | + | ||
21 | +#ifndef QEMU_DSA_H | 39 | +#ifndef QEMU_DSA_H |
22 | +#define QEMU_DSA_H | 40 | +#define QEMU_DSA_H |
23 | + | 41 | + |
42 | +#include "qapi/error.h" | ||
24 | +#include "qemu/thread.h" | 43 | +#include "qemu/thread.h" |
25 | +#include "qemu/queue.h" | 44 | +#include "qemu/queue.h" |
26 | + | 45 | + |
27 | +#ifdef CONFIG_DSA_OPT | 46 | +#ifdef CONFIG_DSA_OPT |
28 | + | 47 | + |
29 | +#pragma GCC push_options | 48 | +#pragma GCC push_options |
30 | +#pragma GCC target("enqcmd") | 49 | +#pragma GCC target("enqcmd") |
31 | + | 50 | + |
32 | +#include <linux/idxd.h> | 51 | +#include <linux/idxd.h> |
33 | +#include "x86intrin.h" | 52 | +#include "x86intrin.h" |
34 | + | 53 | + |
54 | +typedef struct { | ||
55 | + void *work_queue; | ||
56 | +} QemuDsaDevice; | ||
57 | + | ||
58 | +typedef QSIMPLEQ_HEAD(QemuDsaTaskQueue, QemuDsaBatchTask) QemuDsaTaskQueue; | ||
59 | + | ||
60 | +typedef struct { | ||
61 | + QemuDsaDevice *dsa_devices; | ||
62 | + int num_dsa_devices; | ||
63 | + /* The index of the next DSA device to be used. */ | ||
64 | + uint32_t device_allocator_index; | ||
65 | + bool running; | ||
66 | + QemuMutex task_queue_lock; | ||
67 | + QemuCond task_queue_cond; | ||
68 | + QemuDsaTaskQueue task_queue; | ||
69 | +} QemuDsaDeviceGroup; | ||
70 | + | ||
71 | +/** | ||
72 | + * @brief Initializes DSA devices. | ||
73 | + * | ||
74 | + * @param dsa_parameter A list of DSA device path from migration parameter. | ||
75 | + * | ||
76 | + * @return int Zero if successful, otherwise non zero. | ||
77 | + */ | ||
78 | +int qemu_dsa_init(const strList *dsa_parameter, Error **errp); | ||
79 | + | ||
80 | +/** | ||
81 | + * @brief Start logic to enable using DSA. | ||
82 | + */ | ||
83 | +void qemu_dsa_start(void); | ||
84 | + | ||
85 | +/** | ||
86 | + * @brief Stop the device group and the completion thread. | ||
87 | + */ | ||
88 | +void qemu_dsa_stop(void); | ||
89 | + | ||
90 | +/** | ||
91 | + * @brief Clean up system resources created for DSA offloading. | ||
92 | + */ | ||
93 | +void qemu_dsa_cleanup(void); | ||
94 | + | ||
95 | +/** | ||
96 | + * @brief Check if DSA is running. | ||
97 | + * | ||
98 | + * @return True if DSA is running, otherwise false. | ||
99 | + */ | ||
100 | +bool qemu_dsa_is_running(void); | ||
101 | + | ||
102 | +#else | ||
103 | + | ||
104 | +static inline bool qemu_dsa_is_running(void) | ||
105 | +{ | ||
106 | + return false; | ||
107 | +} | ||
108 | + | ||
109 | +static inline int qemu_dsa_init(const strList *dsa_parameter, Error **errp) | ||
110 | +{ | ||
111 | + error_setg(errp, "DSA accelerator is not enabled."); | ||
112 | + return -1; | ||
113 | +} | ||
114 | + | ||
115 | +static inline void qemu_dsa_start(void) {} | ||
116 | + | ||
117 | +static inline void qemu_dsa_stop(void) {} | ||
118 | + | ||
119 | +static inline void qemu_dsa_cleanup(void) {} | ||
120 | + | ||
35 | +#endif | 121 | +#endif |
36 | + | 122 | + |
37 | +/** | ||
38 | + * @brief Initializes DSA devices. | ||
39 | + * | ||
40 | + * @param dsa_parameter A list of DSA device path from migration parameter. | ||
41 | + * @return int Zero if successful, otherwise non zero. | ||
42 | + */ | ||
43 | +int dsa_init(const char *dsa_parameter); | ||
44 | + | ||
45 | +/** | ||
46 | + * @brief Start logic to enable using DSA. | ||
47 | + */ | ||
48 | +void dsa_start(void); | ||
49 | + | ||
50 | +/** | ||
51 | + * @brief Stop logic to clean up DSA by halting the device group and cleaning up | ||
52 | + * the completion thread. | ||
53 | + */ | ||
54 | +void dsa_stop(void); | ||
55 | + | ||
56 | +/** | ||
57 | + * @brief Clean up system resources created for DSA offloading. | ||
58 | + * This function is called during QEMU process teardown. | ||
59 | + */ | ||
60 | +void dsa_cleanup(void); | ||
61 | + | ||
62 | +/** | ||
63 | + * @brief Check if DSA is running. | ||
64 | + * | ||
65 | + * @return True if DSA is running, otherwise false. | ||
66 | + */ | ||
67 | +bool dsa_is_running(void); | ||
68 | + | ||
69 | +#endif | 123 | +#endif |
70 | \ No newline at end of file | ||
71 | diff --git a/util/dsa.c b/util/dsa.c | 124 | diff --git a/util/dsa.c b/util/dsa.c |
72 | new file mode 100644 | 125 | new file mode 100644 |
73 | index XXXXXXX..XXXXXXX | 126 | index XXXXXXX..XXXXXXX |
74 | --- /dev/null | 127 | --- /dev/null |
75 | +++ b/util/dsa.c | 128 | +++ b/util/dsa.c |
76 | @@ -XXX,XX +XXX,XX @@ | 129 | @@ -XXX,XX +XXX,XX @@ |
77 | +/* | 130 | +/* |
78 | + * Use Intel Data Streaming Accelerator to offload certain background | 131 | + * Use Intel Data Streaming Accelerator to offload certain background |
79 | + * operations. | 132 | + * operations. |
80 | + * | 133 | + * |
81 | + * Copyright (c) 2023 Hao Xiang <hao.xiang@bytedance.com> | 134 | + * Copyright (C) Bytedance Ltd. |
82 | + * Bryan Zhang <bryan.zhang@bytedance.com> | 135 | + * |
83 | + * | 136 | + * Authors: |
84 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | 137 | + * Hao Xiang <hao.xiang@bytedance.com> |
85 | + * of this software and associated documentation files (the "Software"), to deal | 138 | + * Bryan Zhang <bryan.zhang@bytedance.com> |
86 | + * in the Software without restriction, including without limitation the rights | 139 | + * Yichen Wang <yichen.wang@bytedance.com> |
87 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 140 | + * |
88 | + * copies of the Software, and to permit persons to whom the Software is | 141 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. |
89 | + * furnished to do so, subject to the following conditions: | 142 | + * See the COPYING file in the top-level directory. |
90 | + * | ||
91 | + * The above copyright notice and this permission notice shall be included in | ||
92 | + * all copies or substantial portions of the Software. | ||
93 | + * | ||
94 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
95 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
96 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
97 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
98 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
99 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
100 | + * THE SOFTWARE. | ||
101 | + */ | 143 | + */ |
102 | + | 144 | + |
103 | +#include "qemu/osdep.h" | 145 | +#include "qemu/osdep.h" |
146 | +#include "qapi/error.h" | ||
104 | +#include "qemu/queue.h" | 147 | +#include "qemu/queue.h" |
105 | +#include "qemu/memalign.h" | 148 | +#include "qemu/memalign.h" |
106 | +#include "qemu/lockable.h" | 149 | +#include "qemu/lockable.h" |
107 | +#include "qemu/cutils.h" | 150 | +#include "qemu/cutils.h" |
108 | +#include "qemu/dsa.h" | 151 | +#include "qemu/dsa.h" |
109 | +#include "qemu/bswap.h" | 152 | +#include "qemu/bswap.h" |
110 | +#include "qemu/error-report.h" | 153 | +#include "qemu/error-report.h" |
111 | +#include "qemu/rcu.h" | 154 | +#include "qemu/rcu.h" |
112 | + | 155 | + |
113 | +#ifdef CONFIG_DSA_OPT | ||
114 | + | ||
115 | +#pragma GCC push_options | 156 | +#pragma GCC push_options |
116 | +#pragma GCC target("enqcmd") | 157 | +#pragma GCC target("enqcmd") |
117 | + | 158 | + |
118 | +#include <linux/idxd.h> | 159 | +#include <linux/idxd.h> |
119 | +#include "x86intrin.h" | 160 | +#include "x86intrin.h" |
120 | + | 161 | + |
121 | +#define DSA_WQ_SIZE 4096 | 162 | +#define DSA_WQ_PORTAL_SIZE 4096 |
122 | +#define MAX_DSA_DEVICES 16 | 163 | +#define MAX_DSA_DEVICES 16 |
123 | + | 164 | + |
124 | +typedef QSIMPLEQ_HEAD(dsa_task_queue, buffer_zero_batch_task) dsa_task_queue; | 165 | +uint32_t max_retry_count; |
125 | + | 166 | +static QemuDsaDeviceGroup dsa_group; |
126 | +struct dsa_device { | ||
127 | + void *work_queue; | ||
128 | +}; | ||
129 | + | ||
130 | +struct dsa_device_group { | ||
131 | + struct dsa_device *dsa_devices; | ||
132 | + int num_dsa_devices; | ||
133 | + uint32_t index; | ||
134 | + bool running; | ||
135 | + QemuMutex task_queue_lock; | ||
136 | + QemuCond task_queue_cond; | ||
137 | + dsa_task_queue task_queue; | ||
138 | +}; | ||
139 | + | ||
140 | +uint64_t max_retry_count; | ||
141 | +static struct dsa_device_group dsa_group; | ||
142 | + | 167 | + |
143 | + | 168 | + |
144 | +/** | 169 | +/** |
145 | + * @brief This function opens a DSA device's work queue and | 170 | + * @brief This function opens a DSA device's work queue and |
146 | + * maps the DSA device memory into the current process. | 171 | + * maps the DSA device memory into the current process. |
147 | + * | 172 | + * |
148 | + * @param dsa_wq_path A pointer to the DSA device work queue's file path. | 173 | + * @param dsa_wq_path A pointer to the DSA device work queue's file path. |
149 | + * @return A pointer to the mapped memory. | 174 | + * @return A pointer to the mapped memory, or MAP_FAILED on failure. |
150 | + */ | 175 | + */ |
151 | +static void * | 176 | +static void * |
152 | +map_dsa_device(const char *dsa_wq_path) | 177 | +map_dsa_device(const char *dsa_wq_path) |
153 | +{ | 178 | +{ |
154 | + void *dsa_device; | 179 | + void *dsa_device; |
155 | + int fd; | 180 | + int fd; |
156 | + | 181 | + |
157 | + fd = open(dsa_wq_path, O_RDWR); | 182 | + fd = open(dsa_wq_path, O_RDWR); |
158 | + if (fd < 0) { | 183 | + if (fd < 0) { |
159 | + fprintf(stderr, "open %s failed with errno = %d.\n", | 184 | + error_report("Open %s failed with errno = %d.", |
160 | + dsa_wq_path, errno); | 185 | + dsa_wq_path, errno); |
161 | + return MAP_FAILED; | 186 | + return MAP_FAILED; |
162 | + } | 187 | + } |
163 | + dsa_device = mmap(NULL, DSA_WQ_SIZE, PROT_WRITE, | 188 | + dsa_device = mmap(NULL, DSA_WQ_PORTAL_SIZE, PROT_WRITE, |
164 | + MAP_SHARED | MAP_POPULATE, fd, 0); | 189 | + MAP_SHARED | MAP_POPULATE, fd, 0); |
165 | + close(fd); | 190 | + close(fd); |
166 | + if (dsa_device == MAP_FAILED) { | 191 | + if (dsa_device == MAP_FAILED) { |
167 | + fprintf(stderr, "mmap failed with errno = %d.\n", errno); | 192 | + error_report("mmap failed with errno = %d.", errno); |
168 | + return MAP_FAILED; | 193 | + return MAP_FAILED; |
169 | + } | 194 | + } |
170 | + return dsa_device; | 195 | + return dsa_device; |
171 | +} | 196 | +} |
172 | + | 197 | + |
173 | +/** | 198 | +/** |
174 | + * @brief Initializes a DSA device structure. | 199 | + * @brief Initializes a DSA device structure. |
175 | + * | 200 | + * |
176 | + * @param instance A pointer to the DSA device. | 201 | + * @param instance A pointer to the DSA device. |
177 | + * @param work_queue A pointer to the DSA work queue. | 202 | + * @param work_queue A pointer to the DSA work queue. |
178 | + */ | 203 | + */ |
179 | +static void | 204 | +static void |
180 | +dsa_device_init(struct dsa_device *instance, | 205 | +dsa_device_init(QemuDsaDevice *instance, |
181 | + void *dsa_work_queue) | 206 | + void *dsa_work_queue) |
182 | +{ | 207 | +{ |
183 | + instance->work_queue = dsa_work_queue; | 208 | + instance->work_queue = dsa_work_queue; |
184 | +} | 209 | +} |
185 | + | 210 | + |
186 | +/** | 211 | +/** |
187 | + * @brief Cleans up a DSA device structure. | 212 | + * @brief Cleans up a DSA device structure. |
188 | + * | 213 | + * |
189 | + * @param instance A pointer to the DSA device to cleanup. | 214 | + * @param instance A pointer to the DSA device to cleanup. |
190 | + */ | 215 | + */ |
191 | +static void | 216 | +static void |
192 | +dsa_device_cleanup(struct dsa_device *instance) | 217 | +dsa_device_cleanup(QemuDsaDevice *instance) |
193 | +{ | 218 | +{ |
194 | + if (instance->work_queue != MAP_FAILED) { | 219 | + if (instance->work_queue != MAP_FAILED) { |
195 | + munmap(instance->work_queue, DSA_WQ_SIZE); | 220 | + munmap(instance->work_queue, DSA_WQ_PORTAL_SIZE); |
196 | + } | 221 | + } |
197 | +} | 222 | +} |
198 | + | 223 | + |
199 | +/** | 224 | +/** |
200 | + * @brief Initializes a DSA device group. | 225 | + * @brief Initializes a DSA device group. |
201 | + * | 226 | + * |
202 | + * @param group A pointer to the DSA device group. | 227 | + * @param group A pointer to the DSA device group. |
203 | + * @param num_dsa_devices The number of DSA devices this group will have. | 228 | + * @param dsa_parameter A list of DSA device path from are separated by space |
229 | + * character migration parameter. Multiple DSA device path. | ||
204 | + * | 230 | + * |
205 | + * @return Zero if successful, non-zero otherwise. | 231 | + * @return Zero if successful, non-zero otherwise. |
206 | + */ | 232 | + */ |
207 | +static int | 233 | +static int |
208 | +dsa_device_group_init(struct dsa_device_group *group, | 234 | +dsa_device_group_init(QemuDsaDeviceGroup *group, |
209 | + const char *dsa_parameter) | 235 | + const strList *dsa_parameter, |
210 | +{ | 236 | + Error **errp) |
211 | + if (dsa_parameter == NULL || strlen(dsa_parameter) == 0) { | 237 | +{ |
212 | + return 0; | 238 | + if (dsa_parameter == NULL) { |
239 | + error_setg(errp, "dsa device path is not supplied."); | ||
240 | + return -1; | ||
213 | + } | 241 | + } |
214 | + | 242 | + |
215 | + int ret = 0; | 243 | + int ret = 0; |
216 | + char *local_dsa_parameter = g_strdup(dsa_parameter); | ||
217 | + const char *dsa_path[MAX_DSA_DEVICES]; | 244 | + const char *dsa_path[MAX_DSA_DEVICES]; |
218 | + int num_dsa_devices = 0; | 245 | + int num_dsa_devices = 0; |
219 | + char delim[2] = " "; | 246 | + |
220 | + | 247 | + while (dsa_parameter) { |
221 | + char *current_dsa_path = strtok(local_dsa_parameter, delim); | 248 | + dsa_path[num_dsa_devices++] = dsa_parameter->value; |
222 | + | ||
223 | + while (current_dsa_path != NULL) { | ||
224 | + dsa_path[num_dsa_devices++] = current_dsa_path; | ||
225 | + if (num_dsa_devices == MAX_DSA_DEVICES) { | 249 | + if (num_dsa_devices == MAX_DSA_DEVICES) { |
226 | + break; | 250 | + break; |
227 | + } | 251 | + } |
228 | + current_dsa_path = strtok(NULL, delim); | 252 | + dsa_parameter = dsa_parameter->next; |
229 | + } | 253 | + } |
230 | + | 254 | + |
231 | + group->dsa_devices = | 255 | + group->dsa_devices = |
232 | + malloc(sizeof(struct dsa_device) * num_dsa_devices); | 256 | + g_new0(QemuDsaDevice, num_dsa_devices); |
233 | + group->num_dsa_devices = num_dsa_devices; | 257 | + group->num_dsa_devices = num_dsa_devices; |
234 | + group->index = 0; | 258 | + group->device_allocator_index = 0; |
235 | + | 259 | + |
236 | + group->running = false; | 260 | + group->running = false; |
237 | + qemu_mutex_init(&group->task_queue_lock); | 261 | + qemu_mutex_init(&group->task_queue_lock); |
238 | + qemu_cond_init(&group->task_queue_cond); | 262 | + qemu_cond_init(&group->task_queue_cond); |
239 | + QSIMPLEQ_INIT(&group->task_queue); | 263 | + QSIMPLEQ_INIT(&group->task_queue); |
240 | + | 264 | + |
241 | + void *dsa_wq = MAP_FAILED; | 265 | + void *dsa_wq = MAP_FAILED; |
242 | + for (int i = 0; i < num_dsa_devices; i++) { | 266 | + for (int i = 0; i < num_dsa_devices; i++) { |
243 | + dsa_wq = map_dsa_device(dsa_path[i]); | 267 | + dsa_wq = map_dsa_device(dsa_path[i]); |
244 | + if (dsa_wq == MAP_FAILED) { | 268 | + if (dsa_wq == MAP_FAILED && ret != -1) { |
245 | + fprintf(stderr, "map_dsa_device failed MAP_FAILED, " | 269 | + error_setg(errp, "map_dsa_device failed MAP_FAILED."); |
246 | + "using simulation.\n"); | ||
247 | + ret = -1; | 270 | + ret = -1; |
248 | + goto exit; | ||
249 | + } | 271 | + } |
250 | + dsa_device_init(&dsa_group.dsa_devices[i], dsa_wq); | 272 | + dsa_device_init(&group->dsa_devices[i], dsa_wq); |
251 | + } | 273 | + } |
252 | + | 274 | + |
253 | +exit: | ||
254 | + g_free(local_dsa_parameter); | ||
255 | + return ret; | 275 | + return ret; |
256 | +} | 276 | +} |
257 | + | 277 | + |
258 | +/** | 278 | +/** |
259 | + * @brief Starts a DSA device group. | 279 | + * @brief Starts a DSA device group. |
260 | + * | 280 | + * |
261 | + * @param group A pointer to the DSA device group. | 281 | + * @param group A pointer to the DSA device group. |
262 | + * @param dsa_path An array of DSA device path. | 282 | + */ |
263 | + * @param num_dsa_devices The number of DSA devices in the device group. | 283 | +static void |
264 | + */ | 284 | +dsa_device_group_start(QemuDsaDeviceGroup *group) |
265 | +static void | ||
266 | +dsa_device_group_start(struct dsa_device_group *group) | ||
267 | +{ | 285 | +{ |
268 | + group->running = true; | 286 | + group->running = true; |
269 | +} | 287 | +} |
270 | + | 288 | + |
271 | +/** | 289 | +/** |
272 | + * @brief Stops a DSA device group. | 290 | + * @brief Stops a DSA device group. |
273 | + * | 291 | + * |
274 | + * @param group A pointer to the DSA device group. | 292 | + * @param group A pointer to the DSA device group. |
275 | + */ | 293 | + */ |
276 | +__attribute__((unused)) | 294 | +__attribute__((unused)) |
277 | +static void | 295 | +static void |
278 | +dsa_device_group_stop(struct dsa_device_group *group) | 296 | +dsa_device_group_stop(QemuDsaDeviceGroup *group) |
279 | +{ | 297 | +{ |
280 | + group->running = false; | 298 | + group->running = false; |
281 | +} | 299 | +} |
282 | + | 300 | + |
283 | +/** | 301 | +/** |
284 | + * @brief Cleans up a DSA device group. | 302 | + * @brief Cleans up a DSA device group. |
285 | + * | 303 | + * |
286 | + * @param group A pointer to the DSA device group. | 304 | + * @param group A pointer to the DSA device group. |
287 | + */ | 305 | + */ |
288 | +static void | 306 | +static void |
289 | +dsa_device_group_cleanup(struct dsa_device_group *group) | 307 | +dsa_device_group_cleanup(QemuDsaDeviceGroup *group) |
290 | +{ | 308 | +{ |
291 | + if (!group->dsa_devices) { | 309 | + if (!group->dsa_devices) { |
292 | + return; | 310 | + return; |
293 | + } | 311 | + } |
294 | + for (int i = 0; i < group->num_dsa_devices; i++) { | 312 | + for (int i = 0; i < group->num_dsa_devices; i++) { |
295 | + dsa_device_cleanup(&group->dsa_devices[i]); | 313 | + dsa_device_cleanup(&group->dsa_devices[i]); |
296 | + } | 314 | + } |
297 | + free(group->dsa_devices); | 315 | + g_free(group->dsa_devices); |
298 | + group->dsa_devices = NULL; | 316 | + group->dsa_devices = NULL; |
299 | + | 317 | + |
300 | + qemu_mutex_destroy(&group->task_queue_lock); | 318 | + qemu_mutex_destroy(&group->task_queue_lock); |
301 | + qemu_cond_destroy(&group->task_queue_cond); | 319 | + qemu_cond_destroy(&group->task_queue_cond); |
302 | +} | 320 | +} |
303 | + | 321 | + |
304 | +/** | 322 | +/** |
305 | + * @brief Returns the next available DSA device in the group. | 323 | + * @brief Returns the next available DSA device in the group. |
306 | + * | 324 | + * |
307 | + * @param group A pointer to the DSA device group. | 325 | + * @param group A pointer to the DSA device group. |
308 | + * | 326 | + * |
309 | + * @return struct dsa_device* A pointer to the next available DSA device | 327 | + * @return struct QemuDsaDevice* A pointer to the next available DSA device |
310 | + * in the group. | 328 | + * in the group. |
311 | + */ | 329 | + */ |
312 | +__attribute__((unused)) | 330 | +__attribute__((unused)) |
313 | +static struct dsa_device * | 331 | +static QemuDsaDevice * |
314 | +dsa_device_group_get_next_device(struct dsa_device_group *group) | 332 | +dsa_device_group_get_next_device(QemuDsaDeviceGroup *group) |
315 | +{ | 333 | +{ |
316 | + if (group->num_dsa_devices == 0) { | 334 | + if (group->num_dsa_devices == 0) { |
317 | + return NULL; | 335 | + return NULL; |
318 | + } | 336 | + } |
319 | + uint32_t current = qatomic_fetch_inc(&group->index); | 337 | + uint32_t current = qatomic_fetch_inc(&group->device_allocator_index); |
320 | + current %= group->num_dsa_devices; | 338 | + current %= group->num_dsa_devices; |
321 | + return &group->dsa_devices[current]; | 339 | + return &group->dsa_devices[current]; |
322 | +} | 340 | +} |
323 | + | 341 | + |
324 | +/** | 342 | +/** |
325 | + * @brief Check if DSA is running. | 343 | + * @brief Check if DSA is running. |
326 | + * | 344 | + * |
327 | + * @return True if DSA is running, otherwise false. | 345 | + * @return True if DSA is running, otherwise false. |
328 | + */ | 346 | + */ |
329 | +bool dsa_is_running(void) | 347 | +bool qemu_dsa_is_running(void) |
330 | +{ | 348 | +{ |
331 | + return false; | 349 | + return false; |
332 | +} | 350 | +} |
333 | + | 351 | + |
334 | +static void | 352 | +static void |
335 | +dsa_globals_init(void) | 353 | +dsa_globals_init(void) |
336 | +{ | 354 | +{ |
337 | + max_retry_count = UINT64_MAX; | 355 | + max_retry_count = UINT32_MAX; |
338 | +} | 356 | +} |
339 | + | 357 | + |
340 | +/** | 358 | +/** |
341 | + * @brief Initializes DSA devices. | 359 | + * @brief Initializes DSA devices. |
342 | + * | 360 | + * |
343 | + * @param dsa_parameter A list of DSA device path from migration parameter. | 361 | + * @param dsa_parameter A list of DSA device path from migration parameter. |
362 | + * | ||
344 | + * @return int Zero if successful, otherwise non zero. | 363 | + * @return int Zero if successful, otherwise non zero. |
345 | + */ | 364 | + */ |
346 | +int dsa_init(const char *dsa_parameter) | 365 | +int qemu_dsa_init(const strList *dsa_parameter, Error **errp) |
347 | +{ | 366 | +{ |
348 | + dsa_globals_init(); | 367 | + dsa_globals_init(); |
349 | + | 368 | + |
350 | + return dsa_device_group_init(&dsa_group, dsa_parameter); | 369 | + return dsa_device_group_init(&dsa_group, dsa_parameter, errp); |
351 | +} | 370 | +} |
352 | + | 371 | + |
353 | +/** | 372 | +/** |
354 | + * @brief Start logic to enable using DSA. | 373 | + * @brief Start logic to enable using DSA. |
355 | + * | 374 | + * |
356 | + */ | 375 | + */ |
357 | +void dsa_start(void) | 376 | +void qemu_dsa_start(void) |
358 | +{ | 377 | +{ |
359 | + if (dsa_group.num_dsa_devices == 0) { | 378 | + if (dsa_group.num_dsa_devices == 0) { |
360 | + return; | 379 | + return; |
361 | + } | 380 | + } |
362 | + if (dsa_group.running) { | 381 | + if (dsa_group.running) { |
363 | + return; | 382 | + return; |
364 | + } | 383 | + } |
365 | + dsa_device_group_start(&dsa_group); | 384 | + dsa_device_group_start(&dsa_group); |
366 | +} | 385 | +} |
367 | + | 386 | + |
368 | +/** | 387 | +/** |
369 | + * @brief Stop logic to clean up DSA by halting the device group and cleaning up | 388 | + * @brief Stop the device group and the completion thread. |
370 | + * the completion thread. | 389 | + * |
371 | + * | 390 | + */ |
372 | + */ | 391 | +void qemu_dsa_stop(void) |
373 | +void dsa_stop(void) | 392 | +{ |
374 | +{ | 393 | + QemuDsaDeviceGroup *group = &dsa_group; |
375 | + struct dsa_device_group *group = &dsa_group; | ||
376 | + | 394 | + |
377 | + if (!group->running) { | 395 | + if (!group->running) { |
378 | + return; | 396 | + return; |
379 | + } | 397 | + } |
380 | +} | 398 | +} |
381 | + | 399 | + |
382 | +/** | 400 | +/** |
383 | + * @brief Clean up system resources created for DSA offloading. | 401 | + * @brief Clean up system resources created for DSA offloading. |
384 | + * This function is called during QEMU process teardown. | 402 | + * |
385 | + * | 403 | + */ |
386 | + */ | 404 | +void qemu_dsa_cleanup(void) |
387 | +void dsa_cleanup(void) | 405 | +{ |
388 | +{ | 406 | + qemu_dsa_stop(); |
389 | + dsa_stop(); | ||
390 | + dsa_device_group_cleanup(&dsa_group); | 407 | + dsa_device_group_cleanup(&dsa_group); |
391 | +} | 408 | +} |
392 | + | ||
393 | +#else | ||
394 | + | ||
395 | +bool dsa_is_running(void) | ||
396 | +{ | ||
397 | + return false; | ||
398 | +} | ||
399 | + | ||
400 | +int dsa_init(const char *dsa_parameter) | ||
401 | +{ | ||
402 | + fprintf(stderr, "Intel Data Streaming Accelerator is not supported " | ||
403 | + "on this platform.\n"); | ||
404 | + return -1; | ||
405 | +} | ||
406 | + | ||
407 | +void dsa_start(void) {} | ||
408 | + | ||
409 | +void dsa_stop(void) {} | ||
410 | + | ||
411 | +void dsa_cleanup(void) {} | ||
412 | + | ||
413 | +#endif | ||
414 | + | 409 | + |
415 | diff --git a/util/meson.build b/util/meson.build | 410 | diff --git a/util/meson.build b/util/meson.build |
416 | index XXXXXXX..XXXXXXX 100644 | 411 | index XXXXXXX..XXXXXXX 100644 |
417 | --- a/util/meson.build | 412 | --- a/util/meson.build |
418 | +++ b/util/meson.build | 413 | +++ b/util/meson.build |
419 | @@ -XXX,XX +XXX,XX @@ if have_block or have_ga | 414 | @@ -XXX,XX +XXX,XX @@ if cpu == 'aarch64' |
420 | endif | 415 | util_ss.add(files('cpuinfo-aarch64.c')) |
421 | if have_block | 416 | elif cpu in ['x86', 'x86_64'] |
422 | util_ss.add(files('aio-wait.c')) | 417 | util_ss.add(files('cpuinfo-i386.c')) |
423 | + util_ss.add(files('dsa.c')) | 418 | + if config_host_data.get('CONFIG_DSA_OPT') |
424 | util_ss.add(files('buffer.c')) | 419 | + util_ss.add(files('dsa.c')) |
425 | util_ss.add(files('bufferiszero.c')) | 420 | + endif |
426 | util_ss.add(files('hbitmap.c')) | 421 | elif cpu == 'loongarch64' |
422 | util_ss.add(files('cpuinfo-loongarch.c')) | ||
423 | elif cpu in ['ppc', 'ppc64'] | ||
427 | -- | 424 | -- |
428 | 2.30.2 | 425 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * Use a safe thread queue for DSA task enqueue/dequeue. | 3 | * Use a safe thread queue for DSA task enqueue/dequeue. |
2 | * Implement DSA task submission. | 4 | * Implement DSA task submission. |
3 | * Implement DSA batch task submission. | 5 | * Implement DSA batch task submission. |
4 | 6 | ||
5 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 7 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
8 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
9 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
6 | --- | 10 | --- |
7 | include/qemu/dsa.h | 35 ++++++++ | 11 | include/qemu/dsa.h | 29 +++++++ |
8 | util/dsa.c | 196 +++++++++++++++++++++++++++++++++++++++++++++ | 12 | util/dsa.c | 186 ++++++++++++++++++++++++++++++++++++++++++++- |
9 | 2 files changed, 231 insertions(+) | 13 | 2 files changed, 214 insertions(+), 1 deletion(-) |
10 | 14 | ||
11 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | 15 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h |
12 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/include/qemu/dsa.h | 17 | --- a/include/qemu/dsa.h |
14 | +++ b/include/qemu/dsa.h | 18 | +++ b/include/qemu/dsa.h |
15 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
16 | #include <linux/idxd.h> | 20 | #include <linux/idxd.h> |
17 | #include "x86intrin.h" | 21 | #include "x86intrin.h" |
18 | 22 | ||
19 | +enum dsa_task_type { | 23 | +typedef enum QemuDsaTaskType { |
20 | + DSA_TASK = 0, | 24 | + QEMU_DSA_TASK = 0, |
21 | + DSA_BATCH_TASK | 25 | + QEMU_DSA_BATCH_TASK |
22 | +}; | 26 | +} QemuDsaTaskType; |
23 | + | 27 | + |
24 | +enum dsa_task_status { | 28 | +typedef enum QemuDsaTaskStatus { |
25 | + DSA_TASK_READY = 0, | 29 | + QEMU_DSA_TASK_READY = 0, |
26 | + DSA_TASK_PROCESSING, | 30 | + QEMU_DSA_TASK_PROCESSING, |
27 | + DSA_TASK_COMPLETION | 31 | + QEMU_DSA_TASK_COMPLETION |
28 | +}; | 32 | +} QemuDsaTaskStatus; |
29 | + | 33 | + |
30 | +typedef void (*buffer_zero_dsa_completion_fn)(void *); | 34 | typedef struct { |
31 | + | 35 | void *work_queue; |
32 | +typedef struct buffer_zero_batch_task { | 36 | } QemuDsaDevice; |
37 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
38 | QemuDsaTaskQueue task_queue; | ||
39 | } QemuDsaDeviceGroup; | ||
40 | |||
41 | +typedef void (*qemu_dsa_completion_fn)(void *); | ||
42 | + | ||
43 | +typedef struct QemuDsaBatchTask { | ||
33 | + struct dsa_hw_desc batch_descriptor; | 44 | + struct dsa_hw_desc batch_descriptor; |
34 | + struct dsa_hw_desc *descriptors; | 45 | + struct dsa_hw_desc *descriptors; |
35 | + struct dsa_completion_record batch_completion __attribute__((aligned(32))); | 46 | + struct dsa_completion_record batch_completion __attribute__((aligned(32))); |
36 | + struct dsa_completion_record *completions; | 47 | + struct dsa_completion_record *completions; |
37 | + struct dsa_device_group *group; | 48 | + QemuDsaDeviceGroup *group; |
38 | + struct dsa_device *device; | 49 | + QemuDsaDevice *device; |
39 | + buffer_zero_dsa_completion_fn completion_callback; | 50 | + qemu_dsa_completion_fn completion_callback; |
40 | + QemuSemaphore sem_task_complete; | 51 | + QemuSemaphore sem_task_complete; |
41 | + enum dsa_task_type task_type; | 52 | + QemuDsaTaskType task_type; |
42 | + enum dsa_task_status status; | 53 | + QemuDsaTaskStatus status; |
43 | + bool *results; | ||
44 | + int batch_size; | 54 | + int batch_size; |
45 | + QSIMPLEQ_ENTRY(buffer_zero_batch_task) entry; | 55 | + QSIMPLEQ_ENTRY(QemuDsaBatchTask) entry; |
46 | +} buffer_zero_batch_task; | 56 | +} QemuDsaBatchTask; |
47 | + | 57 | + |
48 | +#else | 58 | + |
49 | + | 59 | /** |
50 | +struct buffer_zero_batch_task { | 60 | * @brief Initializes DSA devices. |
51 | + bool *results; | 61 | * |
52 | +}; | ||
53 | + | ||
54 | #endif | ||
55 | |||
56 | /** | ||
57 | diff --git a/util/dsa.c b/util/dsa.c | 62 | diff --git a/util/dsa.c b/util/dsa.c |
58 | index XXXXXXX..XXXXXXX 100644 | 63 | index XXXXXXX..XXXXXXX 100644 |
59 | --- a/util/dsa.c | 64 | --- a/util/dsa.c |
60 | +++ b/util/dsa.c | 65 | +++ b/util/dsa.c |
61 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_get_next_device(struct dsa_device_group *group) | 66 | @@ -XXX,XX +XXX,XX @@ |
67 | #include "x86intrin.h" | ||
68 | |||
69 | #define DSA_WQ_PORTAL_SIZE 4096 | ||
70 | +#define DSA_WQ_DEPTH 128 | ||
71 | #define MAX_DSA_DEVICES 16 | ||
72 | |||
73 | uint32_t max_retry_count; | ||
74 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_get_next_device(QemuDsaDeviceGroup *group) | ||
62 | return &group->dsa_devices[current]; | 75 | return &group->dsa_devices[current]; |
63 | } | 76 | } |
64 | 77 | ||
65 | +/** | 78 | +/** |
66 | + * @brief Empties out the DSA task queue. | 79 | + * @brief Empties out the DSA task queue. |
67 | + * | 80 | + * |
68 | + * @param group A pointer to the DSA device group. | 81 | + * @param group A pointer to the DSA device group. |
69 | + */ | 82 | + */ |
70 | +static void | 83 | +static void |
71 | +dsa_empty_task_queue(struct dsa_device_group *group) | 84 | +dsa_empty_task_queue(QemuDsaDeviceGroup *group) |
72 | +{ | 85 | +{ |
73 | + qemu_mutex_lock(&group->task_queue_lock); | 86 | + qemu_mutex_lock(&group->task_queue_lock); |
74 | + dsa_task_queue *task_queue = &group->task_queue; | 87 | + QemuDsaTaskQueue *task_queue = &group->task_queue; |
75 | + while (!QSIMPLEQ_EMPTY(task_queue)) { | 88 | + while (!QSIMPLEQ_EMPTY(task_queue)) { |
76 | + QSIMPLEQ_REMOVE_HEAD(task_queue, entry); | 89 | + QSIMPLEQ_REMOVE_HEAD(task_queue, entry); |
77 | + } | 90 | + } |
78 | + qemu_mutex_unlock(&group->task_queue_lock); | 91 | + qemu_mutex_unlock(&group->task_queue_lock); |
79 | +} | 92 | +} |
80 | + | 93 | + |
81 | +/** | 94 | +/** |
82 | + * @brief Adds a task to the DSA task queue. | 95 | + * @brief Adds a task to the DSA task queue. |
83 | + * | 96 | + * |
84 | + * @param group A pointer to the DSA device group. | 97 | + * @param group A pointer to the DSA device group. |
85 | + * @param context A pointer to the DSA task to enqueue. | 98 | + * @param task A pointer to the DSA task to enqueue. |
86 | + * | 99 | + * |
87 | + * @return int Zero if successful, otherwise a proper error code. | 100 | + * @return int Zero if successful, otherwise a proper error code. |
88 | + */ | 101 | + */ |
89 | +static int | 102 | +static int |
90 | +dsa_task_enqueue(struct dsa_device_group *group, | 103 | +dsa_task_enqueue(QemuDsaDeviceGroup *group, |
91 | + struct buffer_zero_batch_task *task) | 104 | + QemuDsaBatchTask *task) |
92 | +{ | 105 | +{ |
93 | + dsa_task_queue *task_queue = &group->task_queue; | ||
94 | + QemuMutex *task_queue_lock = &group->task_queue_lock; | ||
95 | + QemuCond *task_queue_cond = &group->task_queue_cond; | ||
96 | + | ||
97 | + bool notify = false; | 106 | + bool notify = false; |
98 | + | 107 | + |
99 | + qemu_mutex_lock(task_queue_lock); | 108 | + qemu_mutex_lock(&group->task_queue_lock); |
100 | + | 109 | + |
101 | + if (!group->running) { | 110 | + if (!group->running) { |
102 | + fprintf(stderr, "DSA: Tried to queue task to stopped device queue\n"); | 111 | + error_report("DSA: Tried to queue task to stopped device queue."); |
103 | + qemu_mutex_unlock(task_queue_lock); | 112 | + qemu_mutex_unlock(&group->task_queue_lock); |
104 | + return -1; | 113 | + return -1; |
105 | + } | 114 | + } |
106 | + | 115 | + |
107 | + // The queue is empty. This enqueue operation is a 0->1 transition. | 116 | + /* The queue is empty. This enqueue operation is a 0->1 transition. */ |
108 | + if (QSIMPLEQ_EMPTY(task_queue)) | 117 | + if (QSIMPLEQ_EMPTY(&group->task_queue)) { |
109 | + notify = true; | 118 | + notify = true; |
110 | + | 119 | + } |
111 | + QSIMPLEQ_INSERT_TAIL(task_queue, task, entry); | 120 | + |
112 | + | 121 | + QSIMPLEQ_INSERT_TAIL(&group->task_queue, task, entry); |
113 | + // We need to notify the waiter for 0->1 transitions. | 122 | + |
114 | + if (notify) | 123 | + /* We need to notify the waiter for 0->1 transitions. */ |
115 | + qemu_cond_signal(task_queue_cond); | 124 | + if (notify) { |
116 | + | 125 | + qemu_cond_signal(&group->task_queue_cond); |
117 | + qemu_mutex_unlock(task_queue_lock); | 126 | + } |
127 | + | ||
128 | + qemu_mutex_unlock(&group->task_queue_lock); | ||
118 | + | 129 | + |
119 | + return 0; | 130 | + return 0; |
120 | +} | 131 | +} |
121 | + | 132 | + |
122 | +/** | 133 | +/** |
123 | + * @brief Takes a DSA task out of the task queue. | 134 | + * @brief Takes a DSA task out of the task queue. |
124 | + * | 135 | + * |
125 | + * @param group A pointer to the DSA device group. | 136 | + * @param group A pointer to the DSA device group. |
126 | + * @return buffer_zero_batch_task* The DSA task being dequeued. | 137 | + * @return QemuDsaBatchTask* The DSA task being dequeued. |
127 | + */ | 138 | + */ |
128 | +__attribute__((unused)) | 139 | +__attribute__((unused)) |
129 | +static struct buffer_zero_batch_task * | 140 | +static QemuDsaBatchTask * |
130 | +dsa_task_dequeue(struct dsa_device_group *group) | 141 | +dsa_task_dequeue(QemuDsaDeviceGroup *group) |
131 | +{ | 142 | +{ |
132 | + struct buffer_zero_batch_task *task = NULL; | 143 | + QemuDsaBatchTask *task = NULL; |
133 | + dsa_task_queue *task_queue = &group->task_queue; | 144 | + |
134 | + QemuMutex *task_queue_lock = &group->task_queue_lock; | 145 | + qemu_mutex_lock(&group->task_queue_lock); |
135 | + QemuCond *task_queue_cond = &group->task_queue_cond; | ||
136 | + | ||
137 | + qemu_mutex_lock(task_queue_lock); | ||
138 | + | 146 | + |
139 | + while (true) { | 147 | + while (true) { |
140 | + if (!group->running) | 148 | + if (!group->running) { |
141 | + goto exit; | 149 | + goto exit; |
142 | + task = QSIMPLEQ_FIRST(task_queue); | 150 | + } |
151 | + task = QSIMPLEQ_FIRST(&group->task_queue); | ||
143 | + if (task != NULL) { | 152 | + if (task != NULL) { |
144 | + break; | 153 | + break; |
145 | + } | 154 | + } |
146 | + qemu_cond_wait(task_queue_cond, task_queue_lock); | 155 | + qemu_cond_wait(&group->task_queue_cond, &group->task_queue_lock); |
147 | + } | 156 | + } |
148 | + | 157 | + |
149 | + QSIMPLEQ_REMOVE_HEAD(task_queue, entry); | 158 | + QSIMPLEQ_REMOVE_HEAD(&group->task_queue, entry); |
150 | + | 159 | + |
151 | +exit: | 160 | +exit: |
152 | + qemu_mutex_unlock(task_queue_lock); | 161 | + qemu_mutex_unlock(&group->task_queue_lock); |
153 | + return task; | 162 | + return task; |
154 | +} | 163 | +} |
155 | + | 164 | + |
156 | +/** | 165 | +/** |
157 | + * @brief Submits a DSA work item to the device work queue. | 166 | + * @brief Submits a DSA work item to the device work queue. |
... | ... | ||
162 | + * @return Zero if successful, non-zero otherwise. | 171 | + * @return Zero if successful, non-zero otherwise. |
163 | + */ | 172 | + */ |
164 | +static int | 173 | +static int |
165 | +submit_wi_int(void *wq, struct dsa_hw_desc *descriptor) | 174 | +submit_wi_int(void *wq, struct dsa_hw_desc *descriptor) |
166 | +{ | 175 | +{ |
167 | + uint64_t retry = 0; | 176 | + uint32_t retry = 0; |
168 | + | 177 | + |
169 | + _mm_sfence(); | 178 | + _mm_sfence(); |
170 | + | 179 | + |
171 | + while (true) { | 180 | + while (true) { |
172 | + if (_enqcmd(wq, descriptor) == 0) { | 181 | + if (_enqcmd(wq, descriptor) == 0) { |
173 | + break; | 182 | + break; |
174 | + } | 183 | + } |
175 | + retry++; | 184 | + retry++; |
176 | + if (retry > max_retry_count) { | 185 | + if (retry > max_retry_count) { |
177 | + fprintf(stderr, "Submit work retry %lu times.\n", retry); | 186 | + error_report("Submit work retry %u times.", retry); |
178 | + exit(1); | 187 | + return -1; |
179 | + } | 188 | + } |
180 | + } | 189 | + } |
181 | + | 190 | + |
182 | + return 0; | 191 | + return 0; |
183 | +} | ||
184 | + | ||
185 | +/** | ||
186 | + * @brief Synchronously submits a DSA work item to the | ||
187 | + * device work queue. | ||
188 | + * | ||
189 | + * @param wq A pointer to the DSA worjk queue's device memory. | ||
190 | + * @param descriptor A pointer to the DSA work item descriptor. | ||
191 | + * | ||
192 | + * @return int Zero if successful, non-zero otherwise. | ||
193 | + */ | ||
194 | +__attribute__((unused)) | ||
195 | +static int | ||
196 | +submit_wi(void *wq, struct dsa_hw_desc *descriptor) | ||
197 | +{ | ||
198 | + return submit_wi_int(wq, descriptor); | ||
199 | +} | 192 | +} |
200 | + | 193 | + |
201 | +/** | 194 | +/** |
202 | + * @brief Asynchronously submits a DSA work item to the | 195 | + * @brief Asynchronously submits a DSA work item to the |
203 | + * device work queue. | 196 | + * device work queue. |
204 | + * | 197 | + * |
205 | + * @param task A pointer to the buffer zero task. | 198 | + * @param task A pointer to the task. |
206 | + * | 199 | + * |
207 | + * @return int Zero if successful, non-zero otherwise. | 200 | + * @return int Zero if successful, non-zero otherwise. |
208 | + */ | 201 | + */ |
209 | +__attribute__((unused)) | 202 | +__attribute__((unused)) |
210 | +static int | 203 | +static int |
211 | +submit_wi_async(struct buffer_zero_batch_task *task) | 204 | +submit_wi_async(QemuDsaBatchTask *task) |
212 | +{ | 205 | +{ |
213 | + struct dsa_device_group *device_group = task->group; | 206 | + QemuDsaDeviceGroup *device_group = task->group; |
214 | + struct dsa_device *device_instance = task->device; | 207 | + QemuDsaDevice *device_instance = task->device; |
215 | + int ret; | 208 | + int ret; |
216 | + | 209 | + |
217 | + assert(task->task_type == DSA_TASK); | 210 | + assert(task->task_type == QEMU_DSA_TASK); |
218 | + | 211 | + |
219 | + task->status = DSA_TASK_PROCESSING; | 212 | + task->status = QEMU_DSA_TASK_PROCESSING; |
220 | + | 213 | + |
221 | + ret = submit_wi_int(device_instance->work_queue, | 214 | + ret = submit_wi_int(device_instance->work_queue, |
222 | + &task->descriptors[0]); | 215 | + &task->descriptors[0]); |
223 | + if (ret != 0) | 216 | + if (ret != 0) { |
224 | + return ret; | 217 | + return ret; |
218 | + } | ||
225 | + | 219 | + |
226 | + return dsa_task_enqueue(device_group, task); | 220 | + return dsa_task_enqueue(device_group, task); |
227 | +} | 221 | +} |
228 | + | 222 | + |
229 | +/** | 223 | +/** |
230 | + * @brief Asynchronously submits a DSA batch work item to the | 224 | + * @brief Asynchronously submits a DSA batch work item to the |
231 | + * device work queue. | 225 | + * device work queue. |
232 | + * | 226 | + * |
233 | + * @param batch_task A pointer to the batch buffer zero task. | 227 | + * @param batch_task A pointer to the batch task. |
234 | + * | 228 | + * |
235 | + * @return int Zero if successful, non-zero otherwise. | 229 | + * @return int Zero if successful, non-zero otherwise. |
236 | + */ | 230 | + */ |
237 | +__attribute__((unused)) | 231 | +__attribute__((unused)) |
238 | +static int | 232 | +static int |
239 | +submit_batch_wi_async(struct buffer_zero_batch_task *batch_task) | 233 | +submit_batch_wi_async(QemuDsaBatchTask *batch_task) |
240 | +{ | 234 | +{ |
241 | + struct dsa_device_group *device_group = batch_task->group; | 235 | + QemuDsaDeviceGroup *device_group = batch_task->group; |
242 | + struct dsa_device *device_instance = batch_task->device; | 236 | + QemuDsaDevice *device_instance = batch_task->device; |
243 | + int ret; | 237 | + int ret; |
244 | + | 238 | + |
245 | + assert(batch_task->task_type == DSA_BATCH_TASK); | 239 | + assert(batch_task->task_type == QEMU_DSA_BATCH_TASK); |
246 | + assert(batch_task->batch_descriptor.desc_count <= batch_task->batch_size); | 240 | + assert(batch_task->batch_descriptor.desc_count <= batch_task->batch_size); |
247 | + assert(batch_task->status == DSA_TASK_READY); | 241 | + assert(batch_task->status == QEMU_DSA_TASK_READY); |
248 | + | 242 | + |
249 | + batch_task->status = DSA_TASK_PROCESSING; | 243 | + batch_task->status = QEMU_DSA_TASK_PROCESSING; |
250 | + | 244 | + |
251 | + ret = submit_wi_int(device_instance->work_queue, | 245 | + ret = submit_wi_int(device_instance->work_queue, |
252 | + &batch_task->batch_descriptor); | 246 | + &batch_task->batch_descriptor); |
253 | + if (ret != 0) | 247 | + if (ret != 0) { |
254 | + return ret; | 248 | + return ret; |
249 | + } | ||
255 | + | 250 | + |
256 | + return dsa_task_enqueue(device_group, batch_task); | 251 | + return dsa_task_enqueue(device_group, batch_task); |
257 | +} | 252 | +} |
258 | + | 253 | + |
259 | /** | 254 | /** |
260 | * @brief Check if DSA is running. | 255 | * @brief Check if DSA is running. |
261 | * | 256 | * |
262 | @@ -XXX,XX +XXX,XX @@ void dsa_stop(void) | 257 | @@ -XXX,XX +XXX,XX @@ bool qemu_dsa_is_running(void) |
258 | static void | ||
259 | dsa_globals_init(void) | ||
260 | { | ||
261 | - max_retry_count = UINT32_MAX; | ||
262 | + /* | ||
263 | + * This value follows a reference example by Intel. The POLL_RETRY_MAX is | ||
264 | + * defined to 10000, so here we used the max WQ depth * 100 for the the max | ||
265 | + * polling retry count. | ||
266 | + */ | ||
267 | + max_retry_count = DSA_WQ_DEPTH * 100; | ||
268 | } | ||
269 | |||
270 | /** | ||
271 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_stop(void) | ||
263 | if (!group->running) { | 272 | if (!group->running) { |
264 | return; | 273 | return; |
265 | } | 274 | } |
266 | + | 275 | + |
267 | + dsa_empty_task_queue(group); | 276 | + dsa_empty_task_queue(group); |
268 | } | 277 | } |
269 | 278 | ||
270 | /** | 279 | /** |
271 | -- | 280 | -- |
272 | 2.30.2 | 281 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * Create a dedicated thread for DSA task completion. | 3 | * Create a dedicated thread for DSA task completion. |
2 | * DSA completion thread runs a loop and poll for completed tasks. | 4 | * DSA completion thread runs a loop and poll for completed tasks. |
3 | * Start and stop DSA completion thread during DSA device start stop. | 5 | * Start and stop DSA completion thread during DSA device start stop. |
4 | 6 | ||
5 | User space application can directly submit task to Intel DSA | 7 | User space application can directly submit task to Intel DSA |
6 | accelerator by writing to DSA's device memory (mapped in user space). | 8 | accelerator by writing to DSA's device memory (mapped in user space). |
7 | Once a task is submitted, the device starts processing it and write | 9 | Once a task is submitted, the device starts processing it and write |
8 | the completion status back to the task. A user space application can | 10 | the completion status back to the task. A user space application can |
9 | poll the task's completion status to check for completion. This change | 11 | poll the task's completion status to check for completion. This change |
10 | uses a dedicated thread to perform DSA task completion checking. | 12 | uses a dedicated thread to perform DSA task completion checking. |
11 | 13 | ||
12 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 14 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
15 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
16 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
13 | --- | 17 | --- |
14 | util/dsa.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++++- | 18 | include/qemu/dsa.h | 1 + |
15 | 1 file changed, 242 insertions(+), 1 deletion(-) | 19 | util/dsa.c | 272 ++++++++++++++++++++++++++++++++++++++++++++- |
20 | 2 files changed, 272 insertions(+), 1 deletion(-) | ||
16 | 21 | ||
22 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/qemu/dsa.h | ||
25 | +++ b/include/qemu/dsa.h | ||
26 | @@ -XXX,XX +XXX,XX @@ typedef struct QemuDsaBatchTask { | ||
27 | QemuDsaTaskType task_type; | ||
28 | QemuDsaTaskStatus status; | ||
29 | int batch_size; | ||
30 | + bool *results; | ||
31 | QSIMPLEQ_ENTRY(QemuDsaBatchTask) entry; | ||
32 | } QemuDsaBatchTask; | ||
33 | |||
17 | diff --git a/util/dsa.c b/util/dsa.c | 34 | diff --git a/util/dsa.c b/util/dsa.c |
18 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/util/dsa.c | 36 | --- a/util/dsa.c |
20 | +++ b/util/dsa.c | 37 | +++ b/util/dsa.c |
21 | @@ -XXX,XX +XXX,XX @@ | 38 | @@ -XXX,XX +XXX,XX @@ |
22 | 39 | #define DSA_WQ_PORTAL_SIZE 4096 | |
23 | #define DSA_WQ_SIZE 4096 | 40 | #define DSA_WQ_DEPTH 128 |
24 | #define MAX_DSA_DEVICES 16 | 41 | #define MAX_DSA_DEVICES 16 |
25 | +#define DSA_COMPLETION_THREAD "dsa_completion" | 42 | +#define DSA_COMPLETION_THREAD "qemu_dsa_completion" |
26 | 43 | + | |
27 | typedef QSIMPLEQ_HEAD(dsa_task_queue, buffer_zero_batch_task) dsa_task_queue; | 44 | +typedef struct { |
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ struct dsa_device_group { | ||
30 | dsa_task_queue task_queue; | ||
31 | }; | ||
32 | |||
33 | +struct dsa_completion_thread { | ||
34 | + bool stopping; | 45 | + bool stopping; |
35 | + bool running; | 46 | + bool running; |
36 | + QemuThread thread; | 47 | + QemuThread thread; |
37 | + int thread_id; | 48 | + int thread_id; |
38 | + QemuSemaphore sem_init_done; | 49 | + QemuSemaphore sem_init_done; |
39 | + struct dsa_device_group *group; | 50 | + QemuDsaDeviceGroup *group; |
40 | +}; | 51 | +} QemuDsaCompletionThread; |
41 | + | 52 | |
42 | uint64_t max_retry_count; | 53 | uint32_t max_retry_count; |
43 | static struct dsa_device_group dsa_group; | 54 | static QemuDsaDeviceGroup dsa_group; |
44 | +static struct dsa_completion_thread completion_thread; | 55 | +static QemuDsaCompletionThread completion_thread; |
45 | 56 | ||
46 | 57 | ||
47 | /** | 58 | /** |
48 | @@ -XXX,XX +XXX,XX @@ submit_batch_wi_async(struct buffer_zero_batch_task *batch_task) | 59 | @@ -XXX,XX +XXX,XX @@ submit_batch_wi_async(QemuDsaBatchTask *batch_task) |
49 | return dsa_task_enqueue(device_group, batch_task); | 60 | return dsa_task_enqueue(device_group, batch_task); |
50 | } | 61 | } |
51 | 62 | ||
52 | +/** | 63 | +/** |
53 | + * @brief Poll for the DSA work item completion. | 64 | + * @brief Poll for the DSA work item completion. |
... | ... | ||
63 | +{ | 74 | +{ |
64 | + uint8_t status; | 75 | + uint8_t status; |
65 | + uint64_t retry = 0; | 76 | + uint64_t retry = 0; |
66 | + | 77 | + |
67 | + while (true) { | 78 | + while (true) { |
68 | + // The DSA operation completes successfully or fails. | 79 | + /* The DSA operation completes successfully or fails. */ |
69 | + status = completion->status; | 80 | + status = completion->status; |
70 | + if (status == DSA_COMP_SUCCESS || | 81 | + if (status == DSA_COMP_SUCCESS || |
71 | + status == DSA_COMP_PAGE_FAULT_NOBOF || | 82 | + status == DSA_COMP_PAGE_FAULT_NOBOF || |
72 | + status == DSA_COMP_BATCH_PAGE_FAULT || | 83 | + status == DSA_COMP_BATCH_PAGE_FAULT || |
73 | + status == DSA_COMP_BATCH_FAIL) { | 84 | + status == DSA_COMP_BATCH_FAIL) { |
74 | + break; | 85 | + break; |
75 | + } else if (status != DSA_COMP_NONE) { | 86 | + } else if (status != DSA_COMP_NONE) { |
76 | + /* TODO: Error handling here on unexpected failure. */ | 87 | + error_report("DSA opcode %d failed with status = %d.", |
77 | + fprintf(stderr, "DSA opcode %d failed with status = %d.\n", | ||
78 | + opcode, status); | 88 | + opcode, status); |
79 | + exit(1); | 89 | + return 1; |
80 | + } | 90 | + } |
81 | + retry++; | 91 | + retry++; |
82 | + if (retry > max_retry_count) { | 92 | + if (retry > max_retry_count) { |
83 | + fprintf(stderr, "Wait for completion retry %lu times.\n", retry); | 93 | + error_report("DSA wait for completion retry %lu times.", retry); |
84 | + exit(1); | 94 | + return 1; |
85 | + } | 95 | + } |
86 | + _mm_pause(); | 96 | + _mm_pause(); |
87 | + } | 97 | + } |
88 | + | 98 | + |
89 | + return 0; | 99 | + return 0; |
90 | +} | 100 | +} |
91 | + | 101 | + |
92 | +/** | 102 | +/** |
93 | + * @brief Complete a single DSA task in the batch task. | 103 | + * @brief Complete a single DSA task in the batch task. |
94 | + * | 104 | + * |
95 | + * @param task A pointer to the batch task structure. | 105 | + * @param task A pointer to the batch task structure. |
96 | + */ | 106 | + * |
97 | +static void | 107 | + * @return Zero if successful, otherwise non-zero. |
98 | +poll_task_completion(struct buffer_zero_batch_task *task) | 108 | + */ |
99 | +{ | 109 | +static int |
100 | + assert(task->task_type == DSA_TASK); | 110 | +poll_task_completion(QemuDsaBatchTask *task) |
111 | +{ | ||
112 | + assert(task->task_type == QEMU_DSA_TASK); | ||
101 | + | 113 | + |
102 | + struct dsa_completion_record *completion = &task->completions[0]; | 114 | + struct dsa_completion_record *completion = &task->completions[0]; |
103 | + uint8_t status; | 115 | + uint8_t status; |
104 | + | 116 | + int ret; |
105 | + poll_completion(completion, task->descriptors[0].opcode); | 117 | + |
118 | + ret = poll_completion(completion, task->descriptors[0].opcode); | ||
119 | + if (ret != 0) { | ||
120 | + goto exit; | ||
121 | + } | ||
106 | + | 122 | + |
107 | + status = completion->status; | 123 | + status = completion->status; |
108 | + if (status == DSA_COMP_SUCCESS) { | 124 | + if (status == DSA_COMP_SUCCESS) { |
109 | + task->results[0] = (completion->result == 0); | 125 | + task->results[0] = (completion->result == 0); |
110 | + return; | 126 | + goto exit; |
111 | + } | 127 | + } |
112 | + | 128 | + |
113 | + assert(status == DSA_COMP_PAGE_FAULT_NOBOF); | 129 | + assert(status == DSA_COMP_PAGE_FAULT_NOBOF); |
130 | + | ||
131 | +exit: | ||
132 | + return ret; | ||
114 | +} | 133 | +} |
115 | + | 134 | + |
116 | +/** | 135 | +/** |
117 | + * @brief Poll a batch task status until it completes. If DSA task doesn't | 136 | + * @brief Poll a batch task status until it completes. If DSA task doesn't |
118 | + * complete properly, use CPU to complete the task. | 137 | + * complete properly, use CPU to complete the task. |
119 | + * | 138 | + * |
120 | + * @param batch_task A pointer to the DSA batch task. | 139 | + * @param batch_task A pointer to the DSA batch task. |
121 | + */ | 140 | + * |
122 | +static void | 141 | + * @return Zero if successful, otherwise non-zero. |
123 | +poll_batch_task_completion(struct buffer_zero_batch_task *batch_task) | 142 | + */ |
124 | +{ | 143 | +static int |
125 | + struct dsa_completion_record *batch_completion = &batch_task->batch_completion; | 144 | +poll_batch_task_completion(QemuDsaBatchTask *batch_task) |
145 | +{ | ||
146 | + struct dsa_completion_record *batch_completion = | ||
147 | + &batch_task->batch_completion; | ||
126 | + struct dsa_completion_record *completion; | 148 | + struct dsa_completion_record *completion; |
127 | + uint8_t batch_status; | 149 | + uint8_t batch_status; |
128 | + uint8_t status; | 150 | + uint8_t status; |
129 | + bool *results = batch_task->results; | 151 | + bool *results = batch_task->results; |
130 | + uint32_t count = batch_task->batch_descriptor.desc_count; | 152 | + uint32_t count = batch_task->batch_descriptor.desc_count; |
131 | + | 153 | + int ret; |
132 | + poll_completion(batch_completion, | 154 | + |
133 | + batch_task->batch_descriptor.opcode); | 155 | + ret = poll_completion(batch_completion, |
156 | + batch_task->batch_descriptor.opcode); | ||
157 | + if (ret != 0) { | ||
158 | + goto exit; | ||
159 | + } | ||
134 | + | 160 | + |
135 | + batch_status = batch_completion->status; | 161 | + batch_status = batch_completion->status; |
136 | + | 162 | + |
137 | + if (batch_status == DSA_COMP_SUCCESS) { | 163 | + if (batch_status == DSA_COMP_SUCCESS) { |
138 | + if (batch_completion->bytes_completed == count) { | 164 | + if (batch_completion->bytes_completed == count) { |
139 | + // Let's skip checking for each descriptors' completion status | 165 | + /* |
140 | + // if the batch descriptor says all succedded. | 166 | + * Let's skip checking for each descriptors' completion status |
167 | + * if the batch descriptor says all succedded. | ||
168 | + */ | ||
141 | + for (int i = 0; i < count; i++) { | 169 | + for (int i = 0; i < count; i++) { |
142 | + assert(batch_task->completions[i].status == DSA_COMP_SUCCESS); | 170 | + assert(batch_task->completions[i].status == DSA_COMP_SUCCESS); |
143 | + results[i] = (batch_task->completions[i].result == 0); | 171 | + results[i] = (batch_task->completions[i].result == 0); |
144 | + } | 172 | + } |
145 | + return; | 173 | + goto exit; |
146 | + } | 174 | + } |
147 | + } else { | 175 | + } else { |
148 | + assert(batch_status == DSA_COMP_BATCH_FAIL || | 176 | + assert(batch_status == DSA_COMP_BATCH_FAIL || |
149 | + batch_status == DSA_COMP_BATCH_PAGE_FAULT); | 177 | + batch_status == DSA_COMP_BATCH_PAGE_FAULT); |
150 | + } | 178 | + } |
... | ... | ||
158 | + results[i] = (completion->result == 0); | 186 | + results[i] = (completion->result == 0); |
159 | + continue; | 187 | + continue; |
160 | + } | 188 | + } |
161 | + | 189 | + |
162 | + if (status != DSA_COMP_PAGE_FAULT_NOBOF) { | 190 | + if (status != DSA_COMP_PAGE_FAULT_NOBOF) { |
163 | + fprintf(stderr, | 191 | + error_report("Unexpected DSA completion status = %u.", status); |
164 | + "Unexpected completion status = %u.\n", status); | 192 | + ret = 1; |
165 | + assert(false); | 193 | + goto exit; |
166 | + } | 194 | + } |
167 | + } | 195 | + } |
196 | + | ||
197 | +exit: | ||
198 | + return ret; | ||
168 | +} | 199 | +} |
169 | + | 200 | + |
170 | +/** | 201 | +/** |
171 | + * @brief Handles an asynchronous DSA batch task completion. | 202 | + * @brief Handles an asynchronous DSA batch task completion. |
172 | + * | 203 | + * |
173 | + * @param task A pointer to the batch buffer zero task structure. | 204 | + * @param task A pointer to the batch buffer zero task structure. |
174 | + */ | 205 | + */ |
175 | +static void | 206 | +static void |
176 | +dsa_batch_task_complete(struct buffer_zero_batch_task *batch_task) | 207 | +dsa_batch_task_complete(QemuDsaBatchTask *batch_task) |
177 | +{ | 208 | +{ |
178 | + batch_task->status = DSA_TASK_COMPLETION; | 209 | + batch_task->status = QEMU_DSA_TASK_COMPLETION; |
179 | + batch_task->completion_callback(batch_task); | 210 | + batch_task->completion_callback(batch_task); |
180 | +} | 211 | +} |
181 | + | 212 | + |
182 | +/** | 213 | +/** |
183 | + * @brief The function entry point called by a dedicated DSA | 214 | + * @brief The function entry point called by a dedicated DSA |
... | ... | ||
188 | + * @return void* Not used. | 219 | + * @return void* Not used. |
189 | + */ | 220 | + */ |
190 | +static void * | 221 | +static void * |
191 | +dsa_completion_loop(void *opaque) | 222 | +dsa_completion_loop(void *opaque) |
192 | +{ | 223 | +{ |
193 | + struct dsa_completion_thread *thread_context = | 224 | + QemuDsaCompletionThread *thread_context = |
194 | + (struct dsa_completion_thread *)opaque; | 225 | + (QemuDsaCompletionThread *)opaque; |
195 | + struct buffer_zero_batch_task *batch_task; | 226 | + QemuDsaBatchTask *batch_task; |
196 | + struct dsa_device_group *group = thread_context->group; | 227 | + QemuDsaDeviceGroup *group = thread_context->group; |
228 | + int ret; | ||
197 | + | 229 | + |
198 | + rcu_register_thread(); | 230 | + rcu_register_thread(); |
199 | + | 231 | + |
200 | + thread_context->thread_id = qemu_get_thread_id(); | 232 | + thread_context->thread_id = qemu_get_thread_id(); |
201 | + qemu_sem_post(&thread_context->sem_init_done); | 233 | + qemu_sem_post(&thread_context->sem_init_done); |
... | ... | ||
205 | + assert(batch_task != NULL || !group->running); | 237 | + assert(batch_task != NULL || !group->running); |
206 | + if (!group->running) { | 238 | + if (!group->running) { |
207 | + assert(!thread_context->running); | 239 | + assert(!thread_context->running); |
208 | + break; | 240 | + break; |
209 | + } | 241 | + } |
210 | + if (batch_task->task_type == DSA_TASK) { | 242 | + if (batch_task->task_type == QEMU_DSA_TASK) { |
211 | + poll_task_completion(batch_task); | 243 | + ret = poll_task_completion(batch_task); |
212 | + } else { | 244 | + } else { |
213 | + assert(batch_task->task_type == DSA_BATCH_TASK); | 245 | + assert(batch_task->task_type == QEMU_DSA_BATCH_TASK); |
214 | + poll_batch_task_completion(batch_task); | 246 | + ret = poll_batch_task_completion(batch_task); |
247 | + } | ||
248 | + | ||
249 | + if (ret != 0) { | ||
250 | + goto exit; | ||
215 | + } | 251 | + } |
216 | + | 252 | + |
217 | + dsa_batch_task_complete(batch_task); | 253 | + dsa_batch_task_complete(batch_task); |
218 | + } | 254 | + } |
219 | + | 255 | + |
256 | +exit: | ||
257 | + if (ret != 0) { | ||
258 | + error_report("DSA completion thread exited due to internal error."); | ||
259 | + } | ||
220 | + rcu_unregister_thread(); | 260 | + rcu_unregister_thread(); |
221 | + return NULL; | 261 | + return NULL; |
222 | +} | 262 | +} |
223 | + | 263 | + |
224 | +/** | 264 | +/** |
... | ... | ||
227 | + * @param completion_thread A pointer to the completion thread context. | 267 | + * @param completion_thread A pointer to the completion thread context. |
228 | + * @param group A pointer to the DSA device group. | 268 | + * @param group A pointer to the DSA device group. |
229 | + */ | 269 | + */ |
230 | +static void | 270 | +static void |
231 | +dsa_completion_thread_init( | 271 | +dsa_completion_thread_init( |
232 | + struct dsa_completion_thread *completion_thread, | 272 | + QemuDsaCompletionThread *completion_thread, |
233 | + struct dsa_device_group *group) | 273 | + QemuDsaDeviceGroup *group) |
234 | +{ | 274 | +{ |
235 | + completion_thread->stopping = false; | 275 | + completion_thread->stopping = false; |
236 | + completion_thread->running = true; | 276 | + completion_thread->running = true; |
237 | + completion_thread->thread_id = -1; | 277 | + completion_thread->thread_id = -1; |
238 | + qemu_sem_init(&completion_thread->sem_init_done, 0); | 278 | + qemu_sem_init(&completion_thread->sem_init_done, 0); |
... | ... | ||
243 | + dsa_completion_loop, | 283 | + dsa_completion_loop, |
244 | + completion_thread, | 284 | + completion_thread, |
245 | + QEMU_THREAD_JOINABLE); | 285 | + QEMU_THREAD_JOINABLE); |
246 | + | 286 | + |
247 | + /* Wait for initialization to complete */ | 287 | + /* Wait for initialization to complete */ |
248 | + while (completion_thread->thread_id == -1) { | 288 | + qemu_sem_wait(&completion_thread->sem_init_done); |
249 | + qemu_sem_wait(&completion_thread->sem_init_done); | ||
250 | + } | ||
251 | +} | 289 | +} |
252 | + | 290 | + |
253 | +/** | 291 | +/** |
254 | + * @brief Stops the completion thread (and implicitly, the device group). | 292 | + * @brief Stops the completion thread (and implicitly, the device group). |
255 | + * | 293 | + * |
256 | + * @param opaque A pointer to the completion thread. | 294 | + * @param opaque A pointer to the completion thread. |
257 | + */ | 295 | + */ |
258 | +static void dsa_completion_thread_stop(void *opaque) | 296 | +static void dsa_completion_thread_stop(void *opaque) |
259 | +{ | 297 | +{ |
260 | + struct dsa_completion_thread *thread_context = | 298 | + QemuDsaCompletionThread *thread_context = |
261 | + (struct dsa_completion_thread *)opaque; | 299 | + (QemuDsaCompletionThread *)opaque; |
262 | + | 300 | + |
263 | + struct dsa_device_group *group = thread_context->group; | 301 | + QemuDsaDeviceGroup *group = thread_context->group; |
264 | + | 302 | + |
265 | + qemu_mutex_lock(&group->task_queue_lock); | 303 | + qemu_mutex_lock(&group->task_queue_lock); |
266 | + | 304 | + |
267 | + thread_context->stopping = true; | 305 | + thread_context->stopping = true; |
268 | + thread_context->running = false; | 306 | + thread_context->running = false; |
269 | + | 307 | + |
308 | + /* Prevent the compiler from setting group->running first. */ | ||
309 | + barrier(); | ||
270 | + dsa_device_group_stop(group); | 310 | + dsa_device_group_stop(group); |
271 | + | 311 | + |
272 | + qemu_cond_signal(&group->task_queue_cond); | 312 | + qemu_cond_signal(&group->task_queue_cond); |
273 | + qemu_mutex_unlock(&group->task_queue_lock); | 313 | + qemu_mutex_unlock(&group->task_queue_lock); |
274 | + | 314 | + |
... | ... | ||
278 | +} | 318 | +} |
279 | + | 319 | + |
280 | /** | 320 | /** |
281 | * @brief Check if DSA is running. | 321 | * @brief Check if DSA is running. |
282 | * | 322 | * |
283 | @@ -XXX,XX +XXX,XX @@ submit_batch_wi_async(struct buffer_zero_batch_task *batch_task) | 323 | @@ -XXX,XX +XXX,XX @@ submit_batch_wi_async(QemuDsaBatchTask *batch_task) |
284 | */ | 324 | */ |
285 | bool dsa_is_running(void) | 325 | bool qemu_dsa_is_running(void) |
286 | { | 326 | { |
287 | - return false; | 327 | - return false; |
288 | + return completion_thread.running; | 328 | + return completion_thread.running; |
289 | } | 329 | } |
290 | 330 | ||
291 | static void | 331 | static void |
292 | @@ -XXX,XX +XXX,XX @@ void dsa_start(void) | 332 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_start(void) |
293 | return; | 333 | return; |
294 | } | 334 | } |
295 | dsa_device_group_start(&dsa_group); | 335 | dsa_device_group_start(&dsa_group); |
296 | + dsa_completion_thread_init(&completion_thread, &dsa_group); | 336 | + dsa_completion_thread_init(&completion_thread, &dsa_group); |
297 | } | 337 | } |
298 | 338 | ||
299 | /** | 339 | /** |
300 | @@ -XXX,XX +XXX,XX @@ void dsa_stop(void) | 340 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_stop(void) |
301 | return; | 341 | return; |
302 | } | 342 | } |
303 | 343 | ||
304 | + dsa_completion_thread_stop(&completion_thread); | 344 | + dsa_completion_thread_stop(&completion_thread); |
305 | dsa_empty_task_queue(group); | 345 | dsa_empty_task_queue(group); |
306 | } | 346 | } |
307 | 347 | ||
308 | -- | 348 | -- |
309 | 2.30.2 | 349 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | Create DSA task with operation code DSA_OPCODE_COMPVAL. | 3 | Create DSA task with operation code DSA_OPCODE_COMPVAL. |
2 | Here we create two types of DSA tasks, a single DSA task and | 4 | Here we create two types of DSA tasks, a single DSA task and |
3 | a batch DSA task. Batch DSA task reduces task submission overhead | 5 | a batch DSA task. Batch DSA task reduces task submission overhead |
4 | and hence should be the default option. However, due to the way DSA | 6 | and hence should be the default option. However, due to the way DSA |
5 | hardware works, a DSA batch task must contain at least two individual | 7 | hardware works, a DSA batch task must contain at least two individual |
6 | tasks. There are times we need to submit a single task and hence a | 8 | tasks. There are times we need to submit a single task and hence a |
7 | single DSA task submission is also required. | 9 | single DSA task submission is also required. |
8 | 10 | ||
9 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 11 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
10 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> | 12 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> |
13 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
14 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
11 | --- | 15 | --- |
12 | include/qemu/dsa.h | 16 +++ | 16 | include/qemu/dsa.h | 36 ++++++- |
13 | util/dsa.c | 252 +++++++++++++++++++++++++++++++++++++++++---- | 17 | util/dsa.c | 240 ++++++++++++++++++++++++++++++++++++++++++++- |
14 | 2 files changed, 247 insertions(+), 21 deletions(-) | 18 | 2 files changed, 270 insertions(+), 6 deletions(-) |
15 | 19 | ||
16 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | 20 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h |
17 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/qemu/dsa.h | 22 | --- a/include/qemu/dsa.h |
19 | +++ b/include/qemu/dsa.h | 23 | +++ b/include/qemu/dsa.h |
20 | @@ -XXX,XX +XXX,XX @@ struct buffer_zero_batch_task { | 24 | @@ -XXX,XX +XXX,XX @@ |
21 | 25 | #define QEMU_DSA_H | |
22 | #endif | 26 | |
23 | 27 | #include "qapi/error.h" | |
24 | +/** | 28 | +#include "exec/cpu-common.h" |
25 | + * @brief Initializes a buffer zero batch task. | 29 | #include "qemu/thread.h" |
26 | + * | 30 | #include "qemu/queue.h" |
27 | + * @param task A pointer to the batch task to initialize. | 31 | |
28 | + * @param batch_size The number of DSA tasks in the batch. | 32 | @@ -XXX,XX +XXX,XX @@ typedef struct QemuDsaBatchTask { |
29 | + */ | 33 | QemuDsaTaskStatus status; |
30 | +void buffer_zero_batch_task_init(struct buffer_zero_batch_task *task, | 34 | int batch_size; |
31 | + int batch_size); | 35 | bool *results; |
32 | + | 36 | + /* Address of each pages in pages */ |
33 | +/** | 37 | + ram_addr_t *addr; |
34 | + * @brief Performs the proper cleanup on a DSA batch task. | 38 | QSIMPLEQ_ENTRY(QemuDsaBatchTask) entry; |
35 | + * | 39 | } QemuDsaBatchTask; |
36 | + * @param task A pointer to the batch task to cleanup. | 40 | |
37 | + */ | 41 | - |
38 | +void buffer_zero_batch_task_destroy(struct buffer_zero_batch_task *task); | ||
39 | + | ||
40 | /** | 42 | /** |
41 | * @brief Initializes DSA devices. | 43 | * @brief Initializes DSA devices. |
42 | * | 44 | * |
45 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_cleanup(void); | ||
46 | */ | ||
47 | bool qemu_dsa_is_running(void); | ||
48 | |||
49 | +/** | ||
50 | + * @brief Initializes a buffer zero DSA batch task. | ||
51 | + * | ||
52 | + * @param batch_size The number of zero page checking tasks in the batch. | ||
53 | + * @return A pointer to the zero page checking tasks initialized. | ||
54 | + */ | ||
55 | +QemuDsaBatchTask * | ||
56 | +buffer_zero_batch_task_init(int batch_size); | ||
57 | + | ||
58 | +/** | ||
59 | + * @brief Performs the proper cleanup on a DSA batch task. | ||
60 | + * | ||
61 | + * @param task A pointer to the batch task to cleanup. | ||
62 | + */ | ||
63 | +void buffer_zero_batch_task_destroy(QemuDsaBatchTask *task); | ||
64 | + | ||
65 | #else | ||
66 | |||
67 | +typedef struct QemuDsaBatchTask {} QemuDsaBatchTask; | ||
68 | + | ||
69 | static inline bool qemu_dsa_is_running(void) | ||
70 | { | ||
71 | return false; | ||
72 | @@ -XXX,XX +XXX,XX @@ static inline void qemu_dsa_stop(void) {} | ||
73 | |||
74 | static inline void qemu_dsa_cleanup(void) {} | ||
75 | |||
76 | +static inline QemuDsaBatchTask *buffer_zero_batch_task_init(int batch_size) | ||
77 | +{ | ||
78 | + return NULL; | ||
79 | +} | ||
80 | + | ||
81 | +static inline void buffer_zero_batch_task_destroy(QemuDsaBatchTask *task) {} | ||
82 | + | ||
83 | +static inline int | ||
84 | +buffer_is_zero_dsa_batch_sync(QemuDsaBatchTask *batch_task, | ||
85 | + const void **buf, size_t count, size_t len) | ||
86 | +{ | ||
87 | + return -1; | ||
88 | +} | ||
89 | + | ||
90 | #endif | ||
91 | |||
92 | #endif | ||
43 | diff --git a/util/dsa.c b/util/dsa.c | 93 | diff --git a/util/dsa.c b/util/dsa.c |
44 | index XXXXXXX..XXXXXXX 100644 | 94 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/util/dsa.c | 95 | --- a/util/dsa.c |
46 | +++ b/util/dsa.c | 96 | +++ b/util/dsa.c |
47 | @@ -XXX,XX +XXX,XX @@ uint64_t max_retry_count; | 97 | @@ -XXX,XX +XXX,XX @@ uint32_t max_retry_count; |
48 | static struct dsa_device_group dsa_group; | 98 | static QemuDsaDeviceGroup dsa_group; |
49 | static struct dsa_completion_thread completion_thread; | 99 | static QemuDsaCompletionThread completion_thread; |
50 | 100 | ||
51 | +static void buffer_zero_dsa_completion(void *context); | 101 | +static void buffer_zero_dsa_completion(void *context); |
52 | 102 | ||
53 | /** | 103 | /** |
54 | * @brief This function opens a DSA device's work queue and | 104 | * @brief This function opens a DSA device's work queue and |
55 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_start(struct dsa_device_group *group) | 105 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_start(QemuDsaDeviceGroup *group) |
56 | * | 106 | * |
57 | * @param group A pointer to the DSA device group. | 107 | * @param group A pointer to the DSA device group. |
58 | */ | 108 | */ |
59 | -__attribute__((unused)) | 109 | -__attribute__((unused)) |
60 | static void | 110 | static void |
61 | dsa_device_group_stop(struct dsa_device_group *group) | 111 | dsa_device_group_stop(QemuDsaDeviceGroup *group) |
62 | { | 112 | { |
63 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_cleanup(struct dsa_device_group *group) | 113 | @@ -XXX,XX +XXX,XX @@ dsa_device_group_cleanup(QemuDsaDeviceGroup *group) |
64 | * @return struct dsa_device* A pointer to the next available DSA device | 114 | * @return struct QemuDsaDevice* A pointer to the next available DSA device |
65 | * in the group. | 115 | * in the group. |
66 | */ | 116 | */ |
67 | -__attribute__((unused)) | 117 | -__attribute__((unused)) |
68 | static struct dsa_device * | 118 | static QemuDsaDevice * |
69 | dsa_device_group_get_next_device(struct dsa_device_group *group) | 119 | dsa_device_group_get_next_device(QemuDsaDeviceGroup *group) |
70 | { | 120 | { |
71 | @@ -XXX,XX +XXX,XX @@ dsa_task_enqueue(struct dsa_device_group *group, | 121 | @@ -XXX,XX +XXX,XX @@ dsa_task_enqueue(QemuDsaDeviceGroup *group, |
72 | * @param group A pointer to the DSA device group. | 122 | * @param group A pointer to the DSA device group. |
73 | * @return buffer_zero_batch_task* The DSA task being dequeued. | 123 | * @return QemuDsaBatchTask* The DSA task being dequeued. |
74 | */ | 124 | */ |
75 | -__attribute__((unused)) | 125 | -__attribute__((unused)) |
76 | static struct buffer_zero_batch_task * | 126 | static QemuDsaBatchTask * |
77 | dsa_task_dequeue(struct dsa_device_group *group) | 127 | dsa_task_dequeue(QemuDsaDeviceGroup *group) |
78 | { | 128 | { |
79 | @@ -XXX,XX +XXX,XX @@ submit_wi_int(void *wq, struct dsa_hw_desc *descriptor) | 129 | @@ -XXX,XX +XXX,XX @@ submit_wi_int(void *wq, struct dsa_hw_desc *descriptor) |
80 | return 0; | ||
81 | } | ||
82 | |||
83 | -/** | ||
84 | - * @brief Synchronously submits a DSA work item to the | ||
85 | - * device work queue. | ||
86 | - * | ||
87 | - * @param wq A pointer to the DSA worjk queue's device memory. | ||
88 | - * @param descriptor A pointer to the DSA work item descriptor. | ||
89 | - * | ||
90 | - * @return int Zero if successful, non-zero otherwise. | ||
91 | - */ | ||
92 | -__attribute__((unused)) | ||
93 | -static int | ||
94 | -submit_wi(void *wq, struct dsa_hw_desc *descriptor) | ||
95 | -{ | ||
96 | - return submit_wi_int(wq, descriptor); | ||
97 | -} | ||
98 | - | ||
99 | /** | ||
100 | * @brief Asynchronously submits a DSA work item to the | ||
101 | * device work queue. | ||
102 | @@ -XXX,XX +XXX,XX @@ submit_wi(void *wq, struct dsa_hw_desc *descriptor) | ||
103 | * | 130 | * |
104 | * @return int Zero if successful, non-zero otherwise. | 131 | * @return int Zero if successful, non-zero otherwise. |
105 | */ | 132 | */ |
106 | -__attribute__((unused)) | 133 | -__attribute__((unused)) |
107 | static int | 134 | static int |
108 | submit_wi_async(struct buffer_zero_batch_task *task) | 135 | submit_wi_async(QemuDsaBatchTask *task) |
109 | { | 136 | { |
110 | @@ -XXX,XX +XXX,XX @@ submit_wi_async(struct buffer_zero_batch_task *task) | 137 | @@ -XXX,XX +XXX,XX @@ submit_wi_async(QemuDsaBatchTask *task) |
111 | * | 138 | * |
112 | * @return int Zero if successful, non-zero otherwise. | 139 | * @return int Zero if successful, non-zero otherwise. |
113 | */ | 140 | */ |
114 | -__attribute__((unused)) | 141 | -__attribute__((unused)) |
115 | static int | 142 | static int |
116 | submit_batch_wi_async(struct buffer_zero_batch_task *batch_task) | 143 | submit_batch_wi_async(QemuDsaBatchTask *batch_task) |
117 | { | 144 | { |
118 | @@ -XXX,XX +XXX,XX @@ static void dsa_completion_thread_stop(void *opaque) | 145 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_cleanup(void) |
119 | qemu_sem_destroy(&thread_context->sem_init_done); | 146 | dsa_device_group_cleanup(&dsa_group); |
120 | } | 147 | } |
121 | 148 | ||
149 | + | ||
150 | +/* Buffer zero comparison DSA task implementations */ | ||
151 | +/* =============================================== */ | ||
152 | + | ||
153 | +/** | ||
154 | + * @brief Sets a buffer zero comparison DSA task. | ||
155 | + * | ||
156 | + * @param descriptor A pointer to the DSA task descriptor. | ||
157 | + * @param buf A pointer to the memory buffer. | ||
158 | + * @param len The length of the buffer. | ||
159 | + */ | ||
160 | +static void | ||
161 | +buffer_zero_task_set_int(struct dsa_hw_desc *descriptor, | ||
162 | + const void *buf, | ||
163 | + size_t len) | ||
164 | +{ | ||
165 | + struct dsa_completion_record *completion = | ||
166 | + (struct dsa_completion_record *)descriptor->completion_addr; | ||
167 | + | ||
168 | + descriptor->xfer_size = len; | ||
169 | + descriptor->src_addr = (uintptr_t)buf; | ||
170 | + completion->status = 0; | ||
171 | + completion->result = 0; | ||
172 | +} | ||
173 | + | ||
174 | +/** | ||
175 | + * @brief Resets a buffer zero comparison DSA batch task. | ||
176 | + * | ||
177 | + * @param task A pointer to the DSA batch task. | ||
178 | + */ | ||
179 | +static void | ||
180 | +buffer_zero_task_reset(QemuDsaBatchTask *task) | ||
181 | +{ | ||
182 | + task->completions[0].status = DSA_COMP_NONE; | ||
183 | + task->task_type = QEMU_DSA_TASK; | ||
184 | + task->status = QEMU_DSA_TASK_READY; | ||
185 | +} | ||
186 | + | ||
187 | +/** | ||
188 | + * @brief Resets a buffer zero comparison DSA batch task. | ||
189 | + * | ||
190 | + * @param task A pointer to the batch task. | ||
191 | + * @param count The number of DSA tasks this batch task will contain. | ||
192 | + */ | ||
193 | +static void | ||
194 | +buffer_zero_batch_task_reset(QemuDsaBatchTask *task, size_t count) | ||
195 | +{ | ||
196 | + task->batch_completion.status = DSA_COMP_NONE; | ||
197 | + task->batch_descriptor.desc_count = count; | ||
198 | + task->task_type = QEMU_DSA_BATCH_TASK; | ||
199 | + task->status = QEMU_DSA_TASK_READY; | ||
200 | +} | ||
201 | + | ||
202 | +/** | ||
203 | + * @brief Sets a buffer zero comparison DSA task. | ||
204 | + * | ||
205 | + * @param task A pointer to the DSA task. | ||
206 | + * @param buf A pointer to the memory buffer. | ||
207 | + * @param len The buffer length. | ||
208 | + */ | ||
209 | +static void | ||
210 | +buffer_zero_task_set(QemuDsaBatchTask *task, | ||
211 | + const void *buf, | ||
212 | + size_t len) | ||
213 | +{ | ||
214 | + buffer_zero_task_reset(task); | ||
215 | + buffer_zero_task_set_int(&task->descriptors[0], buf, len); | ||
216 | +} | ||
217 | + | ||
218 | +/** | ||
219 | + * @brief Sets a buffer zero comparison batch task. | ||
220 | + * | ||
221 | + * @param batch_task A pointer to the batch task. | ||
222 | + * @param buf An array of memory buffers. | ||
223 | + * @param count The number of buffers in the array. | ||
224 | + * @param len The length of the buffers. | ||
225 | + */ | ||
226 | +static void | ||
227 | +buffer_zero_batch_task_set(QemuDsaBatchTask *batch_task, | ||
228 | + const void **buf, size_t count, size_t len) | ||
229 | +{ | ||
230 | + assert(count > 0); | ||
231 | + assert(count <= batch_task->batch_size); | ||
232 | + | ||
233 | + buffer_zero_batch_task_reset(batch_task, count); | ||
234 | + for (int i = 0; i < count; i++) { | ||
235 | + buffer_zero_task_set_int(&batch_task->descriptors[i], buf[i], len); | ||
236 | + } | ||
237 | +} | ||
238 | + | ||
239 | +/** | ||
240 | + * @brief Asychronously perform a buffer zero DSA operation. | ||
241 | + * | ||
242 | + * @param task A pointer to the batch task structure. | ||
243 | + * @param buf A pointer to the memory buffer. | ||
244 | + * @param len The length of the memory buffer. | ||
245 | + * | ||
246 | + * @return int Zero if successful, otherwise an appropriate error code. | ||
247 | + */ | ||
248 | +__attribute__((unused)) | ||
249 | +static int | ||
250 | +buffer_zero_dsa_async(QemuDsaBatchTask *task, | ||
251 | + const void *buf, size_t len) | ||
252 | +{ | ||
253 | + buffer_zero_task_set(task, buf, len); | ||
254 | + | ||
255 | + return submit_wi_async(task); | ||
256 | +} | ||
257 | + | ||
258 | +/** | ||
259 | + * @brief Sends a memory comparison batch task to a DSA device and wait | ||
260 | + * for completion. | ||
261 | + * | ||
262 | + * @param batch_task The batch task to be submitted to DSA device. | ||
263 | + * @param buf An array of memory buffers to check for zero. | ||
264 | + * @param count The number of buffers. | ||
265 | + * @param len The buffer length. | ||
266 | + */ | ||
267 | +__attribute__((unused)) | ||
268 | +static int | ||
269 | +buffer_zero_dsa_batch_async(QemuDsaBatchTask *batch_task, | ||
270 | + const void **buf, size_t count, size_t len) | ||
271 | +{ | ||
272 | + assert(count <= batch_task->batch_size); | ||
273 | + buffer_zero_batch_task_set(batch_task, buf, count, len); | ||
274 | + | ||
275 | + return submit_batch_wi_async(batch_task); | ||
276 | +} | ||
277 | + | ||
278 | +/** | ||
279 | + * @brief The completion callback function for buffer zero | ||
280 | + * comparison DSA task completion. | ||
281 | + * | ||
282 | + * @param context A pointer to the callback context. | ||
283 | + */ | ||
284 | +static void | ||
285 | +buffer_zero_dsa_completion(void *context) | ||
286 | +{ | ||
287 | + assert(context != NULL); | ||
288 | + | ||
289 | + QemuDsaBatchTask *task = (QemuDsaBatchTask *)context; | ||
290 | + qemu_sem_post(&task->sem_task_complete); | ||
291 | +} | ||
292 | + | ||
293 | +/** | ||
294 | + * @brief Wait for the asynchronous DSA task to complete. | ||
295 | + * | ||
296 | + * @param batch_task A pointer to the buffer zero comparison batch task. | ||
297 | + */ | ||
298 | +__attribute__((unused)) | ||
299 | +static void | ||
300 | +buffer_zero_dsa_wait(QemuDsaBatchTask *batch_task) | ||
301 | +{ | ||
302 | + qemu_sem_wait(&batch_task->sem_task_complete); | ||
303 | +} | ||
304 | + | ||
122 | +/** | 305 | +/** |
123 | + * @brief Initializes a buffer zero comparison DSA task. | 306 | + * @brief Initializes a buffer zero comparison DSA task. |
124 | + * | 307 | + * |
125 | + * @param descriptor A pointer to the DSA task descriptor. | 308 | + * @param descriptor A pointer to the DSA task descriptor. |
126 | + * @param completion A pointer to the DSA task completion record. | 309 | + * @param completion A pointer to the DSA task completion record. |
... | ... | ||
134 | + descriptor->comp_pattern = (uint64_t)0; | 317 | + descriptor->comp_pattern = (uint64_t)0; |
135 | + descriptor->completion_addr = (uint64_t)completion; | 318 | + descriptor->completion_addr = (uint64_t)completion; |
136 | +} | 319 | +} |
137 | + | 320 | + |
138 | +/** | 321 | +/** |
139 | + * @brief Initializes a buffer zero batch task. | 322 | + * @brief Initializes a buffer zero DSA batch task. |
140 | + * | 323 | + * |
141 | + * @param task A pointer to the batch task to initialize. | 324 | + * @param batch_size The number of zero page checking tasks in the batch. |
142 | + * @param batch_size The number of DSA tasks in the batch. | 325 | + * @return A pointer to the zero page checking tasks initialized. |
143 | + */ | 326 | + */ |
144 | +void | 327 | +QemuDsaBatchTask * |
145 | +buffer_zero_batch_task_init(struct buffer_zero_batch_task *task, | 328 | +buffer_zero_batch_task_init(int batch_size) |
146 | + int batch_size) | 329 | +{ |
147 | +{ | 330 | + QemuDsaBatchTask *task = qemu_memalign(64, sizeof(QemuDsaBatchTask)); |
148 | + int descriptors_size = sizeof(*task->descriptors) * batch_size; | 331 | + int descriptors_size = sizeof(*task->descriptors) * batch_size; |
332 | + | ||
149 | + memset(task, 0, sizeof(*task)); | 333 | + memset(task, 0, sizeof(*task)); |
150 | + | 334 | + task->addr = g_new0(ram_addr_t, batch_size); |
335 | + task->results = g_new0(bool, batch_size); | ||
336 | + task->batch_size = batch_size; | ||
151 | + task->descriptors = | 337 | + task->descriptors = |
152 | + (struct dsa_hw_desc *)qemu_memalign(64, descriptors_size); | 338 | + (struct dsa_hw_desc *)qemu_memalign(64, descriptors_size); |
153 | + memset(task->descriptors, 0, descriptors_size); | 339 | + memset(task->descriptors, 0, descriptors_size); |
154 | + task->completions = (struct dsa_completion_record *)qemu_memalign( | 340 | + task->completions = (struct dsa_completion_record *)qemu_memalign( |
155 | + 32, sizeof(*task->completions) * batch_size); | 341 | + 32, sizeof(*task->completions) * batch_size); |
156 | + task->results = g_new0(bool, batch_size); | ||
157 | + task->batch_size = batch_size; | ||
158 | + | 342 | + |
159 | + task->batch_completion.status = DSA_COMP_NONE; | 343 | + task->batch_completion.status = DSA_COMP_NONE; |
160 | + task->batch_descriptor.completion_addr = (uint64_t)&task->batch_completion; | 344 | + task->batch_descriptor.completion_addr = (uint64_t)&task->batch_completion; |
161 | + // TODO: Ensure that we never send a batch with count <= 1 | 345 | + /* TODO: Ensure that we never send a batch with count <= 1 */ |
162 | + task->batch_descriptor.desc_count = 0; | 346 | + task->batch_descriptor.desc_count = 0; |
163 | + task->batch_descriptor.opcode = DSA_OPCODE_BATCH; | 347 | + task->batch_descriptor.opcode = DSA_OPCODE_BATCH; |
164 | + task->batch_descriptor.flags = IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CRAV; | 348 | + task->batch_descriptor.flags = IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CRAV; |
165 | + task->batch_descriptor.desc_list_addr = (uintptr_t)task->descriptors; | 349 | + task->batch_descriptor.desc_list_addr = (uintptr_t)task->descriptors; |
166 | + task->status = DSA_TASK_READY; | 350 | + task->status = QEMU_DSA_TASK_READY; |
167 | + task->group = &dsa_group; | 351 | + task->group = &dsa_group; |
168 | + task->device = dsa_device_group_get_next_device(&dsa_group); | 352 | + task->device = dsa_device_group_get_next_device(&dsa_group); |
169 | + | 353 | + |
170 | + for (int i = 0; i < task->batch_size; i++) { | 354 | + for (int i = 0; i < task->batch_size; i++) { |
171 | + buffer_zero_task_init_int(&task->descriptors[i], | 355 | + buffer_zero_task_init_int(&task->descriptors[i], |
172 | + &task->completions[i]); | 356 | + &task->completions[i]); |
173 | + } | 357 | + } |
174 | + | 358 | + |
175 | + qemu_sem_init(&task->sem_task_complete, 0); | 359 | + qemu_sem_init(&task->sem_task_complete, 0); |
176 | + task->completion_callback = buffer_zero_dsa_completion; | 360 | + task->completion_callback = buffer_zero_dsa_completion; |
361 | + | ||
362 | + return task; | ||
177 | +} | 363 | +} |
178 | + | 364 | + |
179 | +/** | 365 | +/** |
180 | + * @brief Performs the proper cleanup on a DSA batch task. | 366 | + * @brief Performs the proper cleanup on a DSA batch task. |
181 | + * | 367 | + * |
182 | + * @param task A pointer to the batch task to cleanup. | 368 | + * @param task A pointer to the batch task to cleanup. |
183 | + */ | 369 | + */ |
184 | +void | 370 | +void |
185 | +buffer_zero_batch_task_destroy(struct buffer_zero_batch_task *task) | 371 | +buffer_zero_batch_task_destroy(QemuDsaBatchTask *task) |
186 | +{ | 372 | +{ |
187 | + qemu_vfree(task->descriptors); | 373 | + if (task) { |
188 | + qemu_vfree(task->completions); | 374 | + g_free(task->addr); |
189 | + g_free(task->results); | 375 | + g_free(task->results); |
190 | + | 376 | + qemu_vfree(task->descriptors); |
191 | + qemu_sem_destroy(&task->sem_task_complete); | 377 | + qemu_vfree(task->completions); |
192 | +} | 378 | + task->results = NULL; |
193 | + | 379 | + qemu_sem_destroy(&task->sem_task_complete); |
194 | +/** | 380 | + qemu_vfree(task); |
195 | + * @brief Resets a buffer zero comparison DSA batch task. | ||
196 | + * | ||
197 | + * @param task A pointer to the batch task. | ||
198 | + * @param count The number of DSA tasks this batch task will contain. | ||
199 | + */ | ||
200 | +static void | ||
201 | +buffer_zero_batch_task_reset(struct buffer_zero_batch_task *task, size_t count) | ||
202 | +{ | ||
203 | + task->batch_completion.status = DSA_COMP_NONE; | ||
204 | + task->batch_descriptor.desc_count = count; | ||
205 | + task->task_type = DSA_BATCH_TASK; | ||
206 | + task->status = DSA_TASK_READY; | ||
207 | +} | ||
208 | + | ||
209 | +/** | ||
210 | + * @brief Sets a buffer zero comparison DSA task. | ||
211 | + * | ||
212 | + * @param descriptor A pointer to the DSA task descriptor. | ||
213 | + * @param buf A pointer to the memory buffer. | ||
214 | + * @param len The length of the buffer. | ||
215 | + */ | ||
216 | +static void | ||
217 | +buffer_zero_task_set_int(struct dsa_hw_desc *descriptor, | ||
218 | + const void *buf, | ||
219 | + size_t len) | ||
220 | +{ | ||
221 | + struct dsa_completion_record *completion = | ||
222 | + (struct dsa_completion_record *)descriptor->completion_addr; | ||
223 | + | ||
224 | + descriptor->xfer_size = len; | ||
225 | + descriptor->src_addr = (uintptr_t)buf; | ||
226 | + completion->status = 0; | ||
227 | + completion->result = 0; | ||
228 | +} | ||
229 | + | ||
230 | +/** | ||
231 | + * @brief Resets a buffer zero comparison DSA batch task. | ||
232 | + * | ||
233 | + * @param task A pointer to the DSA batch task. | ||
234 | + */ | ||
235 | +static void | ||
236 | +buffer_zero_task_reset(struct buffer_zero_batch_task *task) | ||
237 | +{ | ||
238 | + task->completions[0].status = DSA_COMP_NONE; | ||
239 | + task->task_type = DSA_TASK; | ||
240 | + task->status = DSA_TASK_READY; | ||
241 | +} | ||
242 | + | ||
243 | +/** | ||
244 | + * @brief Sets a buffer zero comparison DSA task. | ||
245 | + * | ||
246 | + * @param task A pointer to the DSA task. | ||
247 | + * @param buf A pointer to the memory buffer. | ||
248 | + * @param len The buffer length. | ||
249 | + */ | ||
250 | +static void | ||
251 | +buffer_zero_task_set(struct buffer_zero_batch_task *task, | ||
252 | + const void *buf, | ||
253 | + size_t len) | ||
254 | +{ | ||
255 | + buffer_zero_task_reset(task); | ||
256 | + buffer_zero_task_set_int(&task->descriptors[0], buf, len); | ||
257 | +} | ||
258 | + | ||
259 | +/** | ||
260 | + * @brief Sets a buffer zero comparison batch task. | ||
261 | + * | ||
262 | + * @param batch_task A pointer to the batch task. | ||
263 | + * @param buf An array of memory buffers. | ||
264 | + * @param count The number of buffers in the array. | ||
265 | + * @param len The length of the buffers. | ||
266 | + */ | ||
267 | +static void | ||
268 | +buffer_zero_batch_task_set(struct buffer_zero_batch_task *batch_task, | ||
269 | + const void **buf, size_t count, size_t len) | ||
270 | +{ | ||
271 | + assert(count > 0); | ||
272 | + assert(count <= batch_task->batch_size); | ||
273 | + | ||
274 | + buffer_zero_batch_task_reset(batch_task, count); | ||
275 | + for (int i = 0; i < count; i++) { | ||
276 | + buffer_zero_task_set_int(&batch_task->descriptors[i], buf[i], len); | ||
277 | + } | 381 | + } |
278 | +} | 382 | +} |
279 | + | ||
280 | +/** | ||
281 | + * @brief Asychronously perform a buffer zero DSA operation. | ||
282 | + * | ||
283 | + * @param task A pointer to the batch task structure. | ||
284 | + * @param buf A pointer to the memory buffer. | ||
285 | + * @param len The length of the memory buffer. | ||
286 | + * | ||
287 | + * @return int Zero if successful, otherwise an appropriate error code. | ||
288 | + */ | ||
289 | +__attribute__((unused)) | ||
290 | +static int | ||
291 | +buffer_zero_dsa_async(struct buffer_zero_batch_task *task, | ||
292 | + const void *buf, size_t len) | ||
293 | +{ | ||
294 | + buffer_zero_task_set(task, buf, len); | ||
295 | + | ||
296 | + return submit_wi_async(task); | ||
297 | +} | ||
298 | + | ||
299 | +/** | ||
300 | + * @brief Sends a memory comparison batch task to a DSA device and wait | ||
301 | + * for completion. | ||
302 | + * | ||
303 | + * @param batch_task The batch task to be submitted to DSA device. | ||
304 | + * @param buf An array of memory buffers to check for zero. | ||
305 | + * @param count The number of buffers. | ||
306 | + * @param len The buffer length. | ||
307 | + */ | ||
308 | +__attribute__((unused)) | ||
309 | +static int | ||
310 | +buffer_zero_dsa_batch_async(struct buffer_zero_batch_task *batch_task, | ||
311 | + const void **buf, size_t count, size_t len) | ||
312 | +{ | ||
313 | + assert(count <= batch_task->batch_size); | ||
314 | + buffer_zero_batch_task_set(batch_task, buf, count, len); | ||
315 | + | ||
316 | + return submit_batch_wi_async(batch_task); | ||
317 | +} | ||
318 | + | ||
319 | +/** | ||
320 | + * @brief The completion callback function for buffer zero | ||
321 | + * comparison DSA task completion. | ||
322 | + * | ||
323 | + * @param context A pointer to the callback context. | ||
324 | + */ | ||
325 | +static void | ||
326 | +buffer_zero_dsa_completion(void *context) | ||
327 | +{ | ||
328 | + assert(context != NULL); | ||
329 | + | ||
330 | + struct buffer_zero_batch_task *task = | ||
331 | + (struct buffer_zero_batch_task *)context; | ||
332 | + qemu_sem_post(&task->sem_task_complete); | ||
333 | +} | ||
334 | + | ||
335 | +/** | ||
336 | + * @brief Wait for the asynchronous DSA task to complete. | ||
337 | + * | ||
338 | + * @param batch_task A pointer to the buffer zero comparison batch task. | ||
339 | + */ | ||
340 | +__attribute__((unused)) | ||
341 | +static void | ||
342 | +buffer_zero_dsa_wait(struct buffer_zero_batch_task *batch_task) | ||
343 | +{ | ||
344 | + qemu_sem_wait(&batch_task->sem_task_complete); | ||
345 | +} | ||
346 | + | ||
347 | /** | ||
348 | * @brief Check if DSA is running. | ||
349 | * | ||
350 | @@ -XXX,XX +XXX,XX @@ void dsa_cleanup(void) | ||
351 | |||
352 | #else | ||
353 | |||
354 | +void buffer_zero_batch_task_init(struct buffer_zero_batch_task *task, | ||
355 | + int batch_size) {} | ||
356 | + | ||
357 | +void buffer_zero_batch_task_destroy(struct buffer_zero_batch_task *task) {} | ||
358 | + | ||
359 | bool dsa_is_running(void) | ||
360 | { | ||
361 | return false; | ||
362 | -- | 383 | -- |
363 | 2.30.2 | 384 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * Add a DSA task completion callback. | 3 | * Add a DSA task completion callback. |
2 | * DSA completion thread will call the tasks's completion callback | 4 | * DSA completion thread will call the tasks's completion callback |
3 | on every task/batch task completion. | 5 | on every task/batch task completion. |
4 | * DSA submission path to wait for completion. | 6 | * DSA submission path to wait for completion. |
5 | * Implement CPU fallback if DSA is not able to complete the task. | 7 | * Implement CPU fallback if DSA is not able to complete the task. |
6 | 8 | ||
7 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 9 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
8 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> | 10 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> |
11 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
12 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
9 | --- | 13 | --- |
10 | include/qemu/dsa.h | 14 +++++ | 14 | include/qemu/dsa.h | 14 +++++ |
11 | util/dsa.c | 153 ++++++++++++++++++++++++++++++++++++++++++++- | 15 | util/dsa.c | 125 +++++++++++++++++++++++++++++++++++++++++++-- |
12 | 2 files changed, 164 insertions(+), 3 deletions(-) | 16 | 2 files changed, 135 insertions(+), 4 deletions(-) |
13 | 17 | ||
14 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | 18 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h |
15 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/include/qemu/dsa.h | 20 | --- a/include/qemu/dsa.h |
17 | +++ b/include/qemu/dsa.h | 21 | +++ b/include/qemu/dsa.h |
18 | @@ -XXX,XX +XXX,XX @@ void buffer_zero_batch_task_init(struct buffer_zero_batch_task *task, | 22 | @@ -XXX,XX +XXX,XX @@ buffer_zero_batch_task_init(int batch_size); |
19 | */ | 23 | */ |
20 | void buffer_zero_batch_task_destroy(struct buffer_zero_batch_task *task); | 24 | void buffer_zero_batch_task_destroy(QemuDsaBatchTask *task); |
21 | 25 | ||
22 | +/** | 26 | +/** |
23 | + * @brief Performs buffer zero comparison on a DSA batch task asynchronously. | 27 | + * @brief Performs buffer zero comparison on a DSA batch task synchronously. |
24 | + * | 28 | + * |
25 | + * @param batch_task A pointer to the batch task. | 29 | + * @param batch_task A pointer to the batch task. |
26 | + * @param buf An array of memory buffers. | 30 | + * @param buf An array of memory buffers. |
27 | + * @param count The number of buffers in the array. | 31 | + * @param count The number of buffers in the array. |
28 | + * @param len The buffer length. | 32 | + * @param len The buffer length. |
29 | + * | 33 | + * |
30 | + * @return Zero if successful, otherwise non-zero. | 34 | + * @return Zero if successful, otherwise non-zero. |
31 | + */ | 35 | + */ |
32 | +int | 36 | +int |
33 | +buffer_is_zero_dsa_batch_async(struct buffer_zero_batch_task *batch_task, | 37 | +buffer_is_zero_dsa_batch_sync(QemuDsaBatchTask *batch_task, |
34 | + const void **buf, size_t count, size_t len); | 38 | + const void **buf, size_t count, size_t len); |
35 | + | 39 | + |
36 | /** | 40 | #else |
37 | * @brief Initializes DSA devices. | 41 | |
38 | * | 42 | typedef struct QemuDsaBatchTask {} QemuDsaBatchTask; |
39 | diff --git a/util/dsa.c b/util/dsa.c | 43 | diff --git a/util/dsa.c b/util/dsa.c |
40 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/util/dsa.c | 45 | --- a/util/dsa.c |
42 | +++ b/util/dsa.c | 46 | +++ b/util/dsa.c |
43 | @@ -XXX,XX +XXX,XX @@ poll_completion(struct dsa_completion_record *completion, | 47 | @@ -XXX,XX +XXX,XX @@ poll_completion(struct dsa_completion_record *completion, |
44 | return 0; | 48 | return 0; |
45 | } | 49 | } |
46 | 50 | ||
47 | +/** | 51 | +/** |
48 | + * @brief Use CPU to complete a single zero page checking task. | 52 | + * @brief Helper function to use CPU to complete a single |
49 | + * | 53 | + * zero page checking task. |
50 | + * @param task A pointer to the task. | 54 | + * |
55 | + * @param completion A pointer to a DSA task completion record. | ||
56 | + * @param descriptor A pointer to a DSA task descriptor. | ||
57 | + * @param result A pointer to the result of a zero page checking. | ||
51 | + */ | 58 | + */ |
52 | +static void | 59 | +static void |
53 | +task_cpu_fallback(struct buffer_zero_batch_task *task) | 60 | +task_cpu_fallback_int(struct dsa_completion_record *completion, |
61 | + struct dsa_hw_desc *descriptor, bool *result) | ||
54 | +{ | 62 | +{ |
55 | + assert(task->task_type == DSA_TASK); | ||
56 | + | ||
57 | + struct dsa_completion_record *completion = &task->completions[0]; | ||
58 | + const uint8_t *buf; | 63 | + const uint8_t *buf; |
59 | + size_t len; | 64 | + size_t len; |
60 | + | 65 | + |
61 | + if (completion->status == DSA_COMP_SUCCESS) { | 66 | + if (completion->status == DSA_COMP_SUCCESS) { |
62 | + return; | 67 | + return; |
... | ... | ||
66 | + * DSA was able to partially complete the operation. Check the | 71 | + * DSA was able to partially complete the operation. Check the |
67 | + * result. If we already know this is not a zero page, we can | 72 | + * result. If we already know this is not a zero page, we can |
68 | + * return now. | 73 | + * return now. |
69 | + */ | 74 | + */ |
70 | + if (completion->bytes_completed != 0 && completion->result != 0) { | 75 | + if (completion->bytes_completed != 0 && completion->result != 0) { |
71 | + task->results[0] = false; | 76 | + *result = false; |
72 | + return; | 77 | + return; |
73 | + } | 78 | + } |
74 | + | 79 | + |
75 | + /* Let's fallback to use CPU to complete it. */ | 80 | + /* Let's fallback to use CPU to complete it. */ |
76 | + buf = (const uint8_t *)task->descriptors[0].src_addr; | 81 | + buf = (const uint8_t *)descriptor->src_addr; |
77 | + len = task->descriptors[0].xfer_size; | 82 | + len = descriptor->xfer_size; |
78 | + task->results[0] = buffer_is_zero(buf + completion->bytes_completed, | 83 | + *result = buffer_is_zero(buf + completion->bytes_completed, |
79 | + len - completion->bytes_completed); | 84 | + len - completion->bytes_completed); |
80 | +} | 85 | +} |
81 | + | 86 | + |
82 | /** | 87 | /** |
83 | * @brief Complete a single DSA task in the batch task. | 88 | * @brief Complete a single DSA task in the batch task. |
84 | * | 89 | * |
85 | @@ -XXX,XX +XXX,XX @@ poll_batch_task_completion(struct buffer_zero_batch_task *batch_task) | 90 | @@ -XXX,XX +XXX,XX @@ dsa_completion_loop(void *opaque) |
86 | } | 91 | (QemuDsaCompletionThread *)opaque; |
87 | } | 92 | QemuDsaBatchTask *batch_task; |
88 | 93 | QemuDsaDeviceGroup *group = thread_context->group; | |
89 | +/** | 94 | - int ret; |
90 | + * @brief Use CPU to complete the zero page checking batch task. | 95 | + int ret = 0; |
91 | + * | 96 | |
92 | + * @param batch_task A pointer to the batch task. | 97 | rcu_register_thread(); |
93 | + */ | 98 | |
94 | +static void | 99 | @@ -XXX,XX +XXX,XX @@ buffer_zero_batch_task_set(QemuDsaBatchTask *batch_task, |
95 | +batch_task_cpu_fallback(struct buffer_zero_batch_task *batch_task) | ||
96 | +{ | ||
97 | + assert(batch_task->task_type == DSA_BATCH_TASK); | ||
98 | + | ||
99 | + struct dsa_completion_record *batch_completion = | ||
100 | + &batch_task->batch_completion; | ||
101 | + struct dsa_completion_record *completion; | ||
102 | + uint8_t status; | ||
103 | + const uint8_t *buf; | ||
104 | + size_t len; | ||
105 | + bool *results = batch_task->results; | ||
106 | + uint32_t count = batch_task->batch_descriptor.desc_count; | ||
107 | + | ||
108 | + // DSA is able to complete the entire batch task. | ||
109 | + if (batch_completion->status == DSA_COMP_SUCCESS) { | ||
110 | + assert(count == batch_completion->bytes_completed); | ||
111 | + return; | ||
112 | + } | ||
113 | + | ||
114 | + /* | ||
115 | + * DSA encounters some error and is not able to complete | ||
116 | + * the entire batch task. Use CPU fallback. | ||
117 | + */ | ||
118 | + for (int i = 0; i < count; i++) { | ||
119 | + completion = &batch_task->completions[i]; | ||
120 | + status = completion->status; | ||
121 | + if (status == DSA_COMP_SUCCESS) { | ||
122 | + continue; | ||
123 | + } | ||
124 | + assert(status == DSA_COMP_PAGE_FAULT_NOBOF); | ||
125 | + | ||
126 | + /* | ||
127 | + * DSA was able to partially complete the operation. Check the | ||
128 | + * result. If we already know this is not a zero page, we can | ||
129 | + * return now. | ||
130 | + */ | ||
131 | + if (completion->bytes_completed != 0 && completion->result != 0) { | ||
132 | + results[i] = false; | ||
133 | + continue; | ||
134 | + } | ||
135 | + | ||
136 | + /* Let's fallback to use CPU to complete it. */ | ||
137 | + buf = (uint8_t *)batch_task->descriptors[i].src_addr; | ||
138 | + len = batch_task->descriptors[i].xfer_size; | ||
139 | + results[i] = | ||
140 | + buffer_is_zero(buf + completion->bytes_completed, | ||
141 | + len - completion->bytes_completed); | ||
142 | + } | ||
143 | +} | ||
144 | + | ||
145 | /** | ||
146 | * @brief Handles an asynchronous DSA batch task completion. | ||
147 | * | ||
148 | @@ -XXX,XX +XXX,XX @@ buffer_zero_batch_task_set(struct buffer_zero_batch_task *batch_task, | ||
149 | * | 100 | * |
150 | * @return int Zero if successful, otherwise an appropriate error code. | 101 | * @return int Zero if successful, otherwise an appropriate error code. |
151 | */ | 102 | */ |
152 | -__attribute__((unused)) | 103 | -__attribute__((unused)) |
153 | static int | 104 | static int |
154 | buffer_zero_dsa_async(struct buffer_zero_batch_task *task, | 105 | buffer_zero_dsa_async(QemuDsaBatchTask *task, |
155 | const void *buf, size_t len) | 106 | const void *buf, size_t len) |
156 | @@ -XXX,XX +XXX,XX @@ buffer_zero_dsa_async(struct buffer_zero_batch_task *task, | 107 | @@ -XXX,XX +XXX,XX @@ buffer_zero_dsa_async(QemuDsaBatchTask *task, |
157 | * @param count The number of buffers. | 108 | * @param count The number of buffers. |
158 | * @param len The buffer length. | 109 | * @param len The buffer length. |
159 | */ | 110 | */ |
160 | -__attribute__((unused)) | 111 | -__attribute__((unused)) |
161 | static int | 112 | static int |
162 | buffer_zero_dsa_batch_async(struct buffer_zero_batch_task *batch_task, | 113 | buffer_zero_dsa_batch_async(QemuDsaBatchTask *batch_task, |
163 | const void **buf, size_t count, size_t len) | 114 | const void **buf, size_t count, size_t len) |
164 | @@ -XXX,XX +XXX,XX @@ buffer_zero_dsa_completion(void *context) | 115 | @@ -XXX,XX +XXX,XX @@ buffer_zero_dsa_completion(void *context) |
165 | * | 116 | * |
166 | * @param batch_task A pointer to the buffer zero comparison batch task. | 117 | * @param batch_task A pointer to the buffer zero comparison batch task. |
167 | */ | 118 | */ |
168 | -__attribute__((unused)) | 119 | -__attribute__((unused)) |
169 | static void | 120 | static void |
170 | buffer_zero_dsa_wait(struct buffer_zero_batch_task *batch_task) | 121 | buffer_zero_dsa_wait(QemuDsaBatchTask *batch_task) |
171 | { | 122 | { |
172 | qemu_sem_wait(&batch_task->sem_task_complete); | 123 | qemu_sem_wait(&batch_task->sem_task_complete); |
173 | } | 124 | } |
174 | 125 | ||
175 | +/** | 126 | +/** |
176 | + * @brief Use CPU to complete the zero page checking task if DSA | 127 | + * @brief Use CPU to complete the zero page checking task if DSA |
177 | + * is not able to complete it. | 128 | + * is not able to complete it. |
178 | + * | 129 | + * |
179 | + * @param batch_task A pointer to the batch task. | 130 | + * @param batch_task A pointer to the batch task. |
180 | + */ | 131 | + */ |
181 | +static void | 132 | +static void |
182 | +buffer_zero_cpu_fallback(struct buffer_zero_batch_task *batch_task) | 133 | +buffer_zero_cpu_fallback(QemuDsaBatchTask *batch_task) |
183 | +{ | 134 | +{ |
184 | + if (batch_task->task_type == DSA_TASK) { | 135 | + if (batch_task->task_type == QEMU_DSA_TASK) { |
185 | + task_cpu_fallback(batch_task); | 136 | + if (batch_task->completions[0].status == DSA_COMP_SUCCESS) { |
186 | + } else { | 137 | + return; |
187 | + assert(batch_task->task_type == DSA_BATCH_TASK); | 138 | + } |
188 | + batch_task_cpu_fallback(batch_task); | 139 | + task_cpu_fallback_int(&batch_task->completions[0], |
140 | + &batch_task->descriptors[0], | ||
141 | + &batch_task->results[0]); | ||
142 | + } else if (batch_task->task_type == QEMU_DSA_BATCH_TASK) { | ||
143 | + struct dsa_completion_record *batch_completion = | ||
144 | + &batch_task->batch_completion; | ||
145 | + struct dsa_completion_record *completion; | ||
146 | + uint8_t status; | ||
147 | + bool *results = batch_task->results; | ||
148 | + uint32_t count = batch_task->batch_descriptor.desc_count; | ||
149 | + | ||
150 | + /* DSA is able to complete the entire batch task. */ | ||
151 | + if (batch_completion->status == DSA_COMP_SUCCESS) { | ||
152 | + assert(count == batch_completion->bytes_completed); | ||
153 | + return; | ||
154 | + } | ||
155 | + | ||
156 | + /* | ||
157 | + * DSA encounters some error and is not able to complete | ||
158 | + * the entire batch task. Use CPU fallback. | ||
159 | + */ | ||
160 | + for (int i = 0; i < count; i++) { | ||
161 | + | ||
162 | + completion = &batch_task->completions[i]; | ||
163 | + status = completion->status; | ||
164 | + | ||
165 | + assert(status == DSA_COMP_SUCCESS || | ||
166 | + status == DSA_COMP_PAGE_FAULT_NOBOF); | ||
167 | + | ||
168 | + task_cpu_fallback_int(completion, | ||
169 | + &batch_task->descriptors[i], | ||
170 | + &results[i]); | ||
171 | + } | ||
189 | + } | 172 | + } |
190 | +} | 173 | +} |
191 | + | 174 | + |
192 | /** | 175 | /** |
193 | * @brief Check if DSA is running. | 176 | * @brief Initializes a buffer zero comparison DSA task. |
194 | * | 177 | * |
195 | @@ -XXX,XX +XXX,XX @@ void dsa_cleanup(void) | 178 | @@ -XXX,XX +XXX,XX @@ buffer_zero_batch_task_destroy(QemuDsaBatchTask *task) |
196 | dsa_device_group_cleanup(&dsa_group); | 179 | qemu_vfree(task); |
180 | } | ||
197 | } | 181 | } |
198 | 182 | + | |
199 | +/** | 183 | +/** |
200 | + * @brief Performs buffer zero comparison on a DSA batch task asynchronously. | 184 | + * @brief Performs buffer zero comparison on a DSA batch task synchronously. |
201 | + * | 185 | + * |
202 | + * @param batch_task A pointer to the batch task. | 186 | + * @param batch_task A pointer to the batch task. |
203 | + * @param buf An array of memory buffers. | 187 | + * @param buf An array of memory buffers. |
204 | + * @param count The number of buffers in the array. | 188 | + * @param count The number of buffers in the array. |
205 | + * @param len The buffer length. | 189 | + * @param len The buffer length. |
206 | + * | 190 | + * |
207 | + * @return Zero if successful, otherwise non-zero. | 191 | + * @return Zero if successful, otherwise non-zero. |
208 | + */ | 192 | + */ |
209 | +int | 193 | +int |
210 | +buffer_is_zero_dsa_batch_async(struct buffer_zero_batch_task *batch_task, | 194 | +buffer_is_zero_dsa_batch_sync(QemuDsaBatchTask *batch_task, |
211 | + const void **buf, size_t count, size_t len) | 195 | + const void **buf, size_t count, size_t len) |
212 | +{ | 196 | +{ |
213 | + if (count <= 0 || count > batch_task->batch_size) { | ||
214 | + return -1; | ||
215 | + } | ||
216 | + | ||
217 | + assert(batch_task != NULL); | 197 | + assert(batch_task != NULL); |
218 | + assert(len != 0); | 198 | + assert(len != 0); |
219 | + assert(buf != NULL); | 199 | + assert(buf != NULL); |
220 | + | 200 | + |
201 | + if (count <= 0 || count > batch_task->batch_size) { | ||
202 | + return -1; | ||
203 | + } | ||
204 | + | ||
221 | + if (count == 1) { | 205 | + if (count == 1) { |
222 | + // DSA doesn't take batch operation with only 1 task. | 206 | + /* DSA doesn't take batch operation with only 1 task. */ |
223 | + buffer_zero_dsa_async(batch_task, buf[0], len); | 207 | + buffer_zero_dsa_async(batch_task, buf[0], len); |
224 | + } else { | 208 | + } else { |
225 | + buffer_zero_dsa_batch_async(batch_task, buf, count, len); | 209 | + buffer_zero_dsa_batch_async(batch_task, buf, count, len); |
226 | + } | 210 | + } |
227 | + | 211 | + |
228 | + buffer_zero_dsa_wait(batch_task); | 212 | + buffer_zero_dsa_wait(batch_task); |
229 | + buffer_zero_cpu_fallback(batch_task); | 213 | + buffer_zero_cpu_fallback(batch_task); |
230 | + | 214 | + |
231 | + return 0; | 215 | + return 0; |
232 | +} | 216 | +} |
233 | + | ||
234 | #else | ||
235 | |||
236 | void buffer_zero_batch_task_init(struct buffer_zero_batch_task *task, | ||
237 | @@ -XXX,XX +XXX,XX @@ void dsa_stop(void) {} | ||
238 | |||
239 | void dsa_cleanup(void) {} | ||
240 | |||
241 | +int | ||
242 | +buffer_is_zero_dsa_batch_async(struct buffer_zero_batch_task *batch_task, | ||
243 | + const void **buf, size_t count, size_t len) | ||
244 | +{ | ||
245 | + exit(1); | ||
246 | +} | ||
247 | + | ||
248 | #endif | ||
249 | |||
250 | -- | 217 | -- |
251 | 2.30.2 | 218 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | Intel DSA offloading is an optional feature that turns on if | 3 | Intel DSA offloading is an optional feature that turns on if |
2 | proper hardware and software stack is available. To turn on | 4 | proper hardware and software stack is available. To turn on |
3 | DSA offloading in multifd live migration: | 5 | DSA offloading in multifd live migration by setting: |
4 | 6 | ||
5 | multifd-dsa-accel="[dsa_dev_path1] ] [dsa_dev_path2] ... [dsa_dev_pathX]" | 7 | zero-page-detection=dsa-accel |
8 | accel-path="dsa:<dsa_dev_path1> dsa:[dsa_dev_path2] ..." | ||
6 | 9 | ||
7 | This feature is turned off by default. | 10 | This feature is turned off by default. |
8 | 11 | ||
9 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 12 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
13 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
14 | Acked-by: Dr. David Alan Gilbert <dave@treblig.org> | ||
10 | --- | 15 | --- |
11 | migration/migration-hmp-cmds.c | 8 ++++++++ | 16 | hmp-commands.hx | 2 +- |
12 | migration/options.c | 28 ++++++++++++++++++++++++++++ | 17 | include/qemu/dsa.h | 13 +++++++++++++ |
18 | migration/migration-hmp-cmds.c | 20 +++++++++++++++++++- | ||
19 | migration/options.c | 30 ++++++++++++++++++++++++++++++ | ||
13 | migration/options.h | 1 + | 20 | migration/options.h | 1 + |
14 | qapi/migration.json | 17 ++++++++++++++--- | 21 | qapi/migration.json | 32 ++++++++++++++++++++++++++++---- |
15 | scripts/meson-buildoptions.sh | 6 +++--- | 22 | util/dsa.c | 31 +++++++++++++++++++++++++++++++ |
16 | 5 files changed, 54 insertions(+), 6 deletions(-) | 23 | 7 files changed, 123 insertions(+), 6 deletions(-) |
17 | 24 | ||
25 | diff --git a/hmp-commands.hx b/hmp-commands.hx | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/hmp-commands.hx | ||
28 | +++ b/hmp-commands.hx | ||
29 | @@ -XXX,XX +XXX,XX @@ ERST | ||
30 | |||
31 | { | ||
32 | .name = "migrate_set_parameter", | ||
33 | - .args_type = "parameter:s,value:s", | ||
34 | + .args_type = "parameter:s,value:S", | ||
35 | .params = "parameter value", | ||
36 | .help = "Set the parameter for migration", | ||
37 | .cmd = hmp_migrate_set_parameter, | ||
38 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/include/qemu/dsa.h | ||
41 | +++ b/include/qemu/dsa.h | ||
42 | @@ -XXX,XX +XXX,XX @@ void qemu_dsa_stop(void); | ||
43 | */ | ||
44 | void qemu_dsa_cleanup(void); | ||
45 | |||
46 | +/** | ||
47 | + * @brief Check if DSA is supported. | ||
48 | + * | ||
49 | + * @return True if DSA is supported, otherwise false. | ||
50 | + */ | ||
51 | +bool qemu_dsa_is_supported(void); | ||
52 | + | ||
53 | /** | ||
54 | * @brief Check if DSA is running. | ||
55 | * | ||
56 | @@ -XXX,XX +XXX,XX @@ buffer_is_zero_dsa_batch_sync(QemuDsaBatchTask *batch_task, | ||
57 | |||
58 | typedef struct QemuDsaBatchTask {} QemuDsaBatchTask; | ||
59 | |||
60 | +static inline bool qemu_dsa_is_supported(void) | ||
61 | +{ | ||
62 | + return false; | ||
63 | +} | ||
64 | + | ||
65 | + | ||
66 | static inline bool qemu_dsa_is_running(void) | ||
67 | { | ||
68 | return false; | ||
18 | diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c | 69 | diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c |
19 | index XXXXXXX..XXXXXXX 100644 | 70 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/migration/migration-hmp-cmds.c | 71 | --- a/migration/migration-hmp-cmds.c |
21 | +++ b/migration/migration-hmp-cmds.c | 72 | +++ b/migration/migration-hmp-cmds.c |
22 | @@ -XXX,XX +XXX,XX @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) | 73 | @@ -XXX,XX +XXX,XX @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) |
23 | monitor_printf(mon, "%s: '%s'\n", | 74 | monitor_printf(mon, "%s: '%s'\n", |
24 | MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ), | 75 | MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ), |
25 | params->tls_authz); | 76 | params->tls_authz); |
26 | + monitor_printf(mon, "%s: %s\n", | 77 | - |
27 | + MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_DSA_ACCEL), | 78 | + if (params->has_accel_path) { |
28 | + params->multifd_dsa_accel); | 79 | + strList *accel_path = params->accel_path; |
29 | 80 | + monitor_printf(mon, "%s:", | |
81 | + MigrationParameter_str(MIGRATION_PARAMETER_ACCEL_PATH)); | ||
82 | + while (accel_path) { | ||
83 | + monitor_printf(mon, " '%s'", accel_path->value); | ||
84 | + accel_path = accel_path->next; | ||
85 | + } | ||
86 | + monitor_printf(mon, "\n"); | ||
87 | + } | ||
30 | if (params->has_block_bitmap_mapping) { | 88 | if (params->has_block_bitmap_mapping) { |
31 | const BitmapMigrationNodeAliasList *bmnal; | 89 | const BitmapMigrationNodeAliasList *bmnal; |
90 | |||
32 | @@ -XXX,XX +XXX,XX @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) | 91 | @@ -XXX,XX +XXX,XX @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) |
33 | p->has_block_incremental = true; | 92 | p->has_x_checkpoint_delay = true; |
34 | visit_type_bool(v, param, &p->block_incremental, &err); | 93 | visit_type_uint32(v, param, &p->x_checkpoint_delay, &err); |
35 | break; | 94 | break; |
36 | + case MIGRATION_PARAMETER_MULTIFD_DSA_ACCEL: | 95 | + case MIGRATION_PARAMETER_ACCEL_PATH: |
37 | + p->multifd_dsa_accel = g_new0(StrOrNull, 1); | 96 | + p->has_accel_path = true; |
38 | + p->multifd_dsa_accel->type = QTYPE_QSTRING; | 97 | + char **strv = g_strsplit(valuestr ? : "", " ", -1); |
39 | + visit_type_str(v, param, &p->multifd_dsa_accel->u.s, &err); | 98 | + strList **tail = &p->accel_path; |
99 | + for (int i = 0; strv[i]; i++) { | ||
100 | + QAPI_LIST_APPEND(tail, g_strdup(strv[i])); | ||
101 | + } | ||
102 | + g_strfreev(strv); | ||
40 | + break; | 103 | + break; |
41 | case MIGRATION_PARAMETER_MULTIFD_CHANNELS: | 104 | case MIGRATION_PARAMETER_MULTIFD_CHANNELS: |
42 | p->has_multifd_channels = true; | 105 | p->has_multifd_channels = true; |
43 | visit_type_uint8(v, param, &p->multifd_channels, &err); | 106 | visit_type_uint8(v, param, &p->multifd_channels, &err); |
44 | diff --git a/migration/options.c b/migration/options.c | 107 | diff --git a/migration/options.c b/migration/options.c |
45 | index XXXXXXX..XXXXXXX 100644 | 108 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/migration/options.c | 109 | --- a/migration/options.c |
47 | +++ b/migration/options.c | 110 | +++ b/migration/options.c |
48 | @@ -XXX,XX +XXX,XX @@ Property migration_properties[] = { | 111 | @@ -XXX,XX +XXX,XX @@ |
49 | DEFINE_PROP_MIG_MODE("mode", MigrationState, | 112 | |
50 | parameters.mode, | 113 | #include "qemu/osdep.h" |
51 | MIG_MODE_NORMAL), | 114 | #include "qemu/error-report.h" |
52 | + DEFINE_PROP_STRING("multifd-dsa-accel", MigrationState, | 115 | +#include "qemu/dsa.h" |
53 | + parameters.multifd_dsa_accel), | 116 | #include "exec/target_page.h" |
54 | 117 | #include "qapi/clone-visitor.h" | |
55 | /* Migration capabilities */ | 118 | #include "qapi/error.h" |
56 | DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), | ||
57 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_creds(void) | 119 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_creds(void) |
58 | return s->parameters.tls_creds; | 120 | return s->parameters.tls_creds; |
59 | } | 121 | } |
60 | 122 | ||
61 | +const char *migrate_multifd_dsa_accel(void) | 123 | +const strList *migrate_accel_path(void) |
62 | +{ | 124 | +{ |
63 | + MigrationState *s = migrate_get_current(); | 125 | + MigrationState *s = migrate_get_current(); |
64 | + | 126 | + |
65 | + return s->parameters.multifd_dsa_accel; | 127 | + return s->parameters.accel_path; |
66 | +} | 128 | +} |
67 | + | 129 | + |
68 | const char *migrate_tls_hostname(void) | 130 | const char *migrate_tls_hostname(void) |
69 | { | 131 | { |
70 | MigrationState *s = migrate_get_current(); | 132 | MigrationState *s = migrate_get_current(); |
71 | @@ -XXX,XX +XXX,XX @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) | 133 | @@ -XXX,XX +XXX,XX @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) |
72 | params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; | 134 | params->zero_page_detection = s->parameters.zero_page_detection; |
73 | params->has_mode = true; | 135 | params->has_direct_io = true; |
74 | params->mode = s->parameters.mode; | 136 | params->direct_io = s->parameters.direct_io; |
75 | + params->multifd_dsa_accel = s->parameters.multifd_dsa_accel; | 137 | + params->has_accel_path = true; |
138 | + params->accel_path = QAPI_CLONE(strList, s->parameters.accel_path); | ||
76 | 139 | ||
77 | return params; | 140 | return params; |
78 | } | 141 | } |
79 | @@ -XXX,XX +XXX,XX @@ void migrate_params_init(MigrationParameters *params) | 142 | @@ -XXX,XX +XXX,XX @@ void migrate_params_init(MigrationParameters *params) |
80 | { | 143 | { |
81 | params->tls_hostname = g_strdup(""); | 144 | params->tls_hostname = g_strdup(""); |
82 | params->tls_creds = g_strdup(""); | 145 | params->tls_creds = g_strdup(""); |
83 | + params->multifd_dsa_accel = g_strdup(""); | 146 | + params->accel_path = NULL; |
84 | 147 | ||
85 | /* Set has_* up only for parameter checks */ | 148 | /* Set has_* up only for parameter checks */ |
86 | params->has_compress_level = true; | 149 | params->has_throttle_trigger_threshold = true; |
150 | @@ -XXX,XX +XXX,XX @@ bool migrate_params_check(MigrationParameters *params, Error **errp) | ||
151 | return false; | ||
152 | } | ||
153 | |||
154 | + if (params->has_zero_page_detection && | ||
155 | + params->zero_page_detection == ZERO_PAGE_DETECTION_DSA_ACCEL) { | ||
156 | + if (!qemu_dsa_is_supported()) { | ||
157 | + error_setg(errp, "DSA acceleration is not supported."); | ||
158 | + return false; | ||
159 | + } | ||
160 | + } | ||
161 | + | ||
162 | return true; | ||
163 | } | ||
164 | |||
87 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_test_apply(MigrateSetParameters *params, | 165 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_test_apply(MigrateSetParameters *params, |
88 | if (params->has_mode) { | 166 | if (params->has_direct_io) { |
89 | dest->mode = params->mode; | 167 | dest->direct_io = params->direct_io; |
90 | } | 168 | } |
91 | + | 169 | + |
92 | + if (params->multifd_dsa_accel) { | 170 | + if (params->has_accel_path) { |
93 | + assert(params->multifd_dsa_accel->type == QTYPE_QSTRING); | 171 | + dest->has_accel_path = true; |
94 | + dest->multifd_dsa_accel = params->multifd_dsa_accel->u.s; | 172 | + dest->accel_path = params->accel_path; |
95 | + } | 173 | + } |
96 | } | 174 | } |
97 | 175 | ||
98 | static void migrate_params_apply(MigrateSetParameters *params, Error **errp) | 176 | static void migrate_params_apply(MigrateSetParameters *params, Error **errp) |
99 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) | 177 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) |
100 | if (params->has_mode) { | 178 | if (params->has_direct_io) { |
101 | s->parameters.mode = params->mode; | 179 | s->parameters.direct_io = params->direct_io; |
102 | } | 180 | } |
103 | + | 181 | + if (params->has_accel_path) { |
104 | + if (params->multifd_dsa_accel) { | 182 | + qapi_free_strList(s->parameters.accel_path); |
105 | + g_free(s->parameters.multifd_dsa_accel); | 183 | + s->parameters.has_accel_path = true; |
106 | + assert(params->multifd_dsa_accel->type == QTYPE_QSTRING); | 184 | + s->parameters.accel_path = |
107 | + s->parameters.multifd_dsa_accel = g_strdup(params->multifd_dsa_accel->u.s); | 185 | + QAPI_CLONE(strList, params->accel_path); |
108 | + } | 186 | + } |
109 | } | 187 | } |
110 | 188 | ||
111 | void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) | 189 | void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) |
112 | @@ -XXX,XX +XXX,XX @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) | ||
113 | params->tls_authz->type = QTYPE_QSTRING; | ||
114 | params->tls_authz->u.s = strdup(""); | ||
115 | } | ||
116 | + if (params->multifd_dsa_accel | ||
117 | + && params->multifd_dsa_accel->type == QTYPE_QNULL) { | ||
118 | + qobject_unref(params->multifd_dsa_accel->u.n); | ||
119 | + params->multifd_dsa_accel->type = QTYPE_QSTRING; | ||
120 | + params->multifd_dsa_accel->u.s = strdup(""); | ||
121 | + } | ||
122 | |||
123 | migrate_params_test_apply(params, &tmp); | ||
124 | |||
125 | diff --git a/migration/options.h b/migration/options.h | 190 | diff --git a/migration/options.h b/migration/options.h |
126 | index XXXXXXX..XXXXXXX 100644 | 191 | index XXXXXXX..XXXXXXX 100644 |
127 | --- a/migration/options.h | 192 | --- a/migration/options.h |
128 | +++ b/migration/options.h | 193 | +++ b/migration/options.h |
129 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_authz(void); | 194 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_creds(void); |
130 | const char *migrate_tls_creds(void); | ||
131 | const char *migrate_tls_hostname(void); | 195 | const char *migrate_tls_hostname(void); |
132 | uint64_t migrate_xbzrle_cache_size(void); | 196 | uint64_t migrate_xbzrle_cache_size(void); |
133 | +const char *migrate_multifd_dsa_accel(void); | 197 | ZeroPageDetection migrate_zero_page_detection(void); |
134 | 198 | +const strList *migrate_accel_path(void); | |
135 | /* parameters setters */ | 199 | |
200 | /* parameters helpers */ | ||
136 | 201 | ||
137 | diff --git a/qapi/migration.json b/qapi/migration.json | 202 | diff --git a/qapi/migration.json b/qapi/migration.json |
138 | index XXXXXXX..XXXXXXX 100644 | 203 | index XXXXXXX..XXXXXXX 100644 |
139 | --- a/qapi/migration.json | 204 | --- a/qapi/migration.json |
140 | +++ b/qapi/migration.json | 205 | +++ b/qapi/migration.json |
141 | @@ -XXX,XX +XXX,XX @@ | 206 | @@ -XXX,XX +XXX,XX @@ |
142 | # @mode: Migration mode. See description in @MigMode. Default is 'normal'. | 207 | # multifd migration is enabled, else in the main migration thread |
143 | # (Since 8.2) | 208 | # as for @legacy. |
144 | # | 209 | # |
145 | +# @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | 210 | +# @dsa-accel: Perform zero page checking with the DSA accelerator |
146 | +# certain memory operations. (since 8.2) | 211 | +# offloading in multifd sender thread if multifd migration is |
212 | +# enabled, else in the main migration thread as for @legacy. | ||
147 | +# | 213 | +# |
148 | # Features: | 214 | # Since: 9.0 |
215 | ## | ||
216 | { 'enum': 'ZeroPageDetection', | ||
217 | - 'data': [ 'none', 'legacy', 'multifd' ] } | ||
218 | + 'data': [ 'none', 'legacy', 'multifd', 'dsa-accel' ] } | ||
219 | |||
220 | ## | ||
221 | # @BitmapMigrationBitmapAliasTransform: | ||
222 | @@ -XXX,XX +XXX,XX @@ | ||
223 | # See description in @ZeroPageDetection. Default is 'multifd'. | ||
224 | # (since 9.0) | ||
149 | # | 225 | # |
150 | # @deprecated: Member @block-incremental is deprecated. Use | 226 | +# @accel-path: If enabled, specify the accelerator paths that to be |
227 | +# used in QEMU. For example, enable DSA accelerator for zero page | ||
228 | +# detection offloading by setting the @zero-page-detection to | ||
229 | +# dsa-accel, and defines the accel-path to "dsa:<dsa_device path>". | ||
230 | +# This parameter is default to an empty list. (Since 10.0) | ||
231 | +# | ||
232 | # @direct-io: Open migration files with O_DIRECT when possible. This | ||
233 | # only has effect if the @mapped-ram capability is enabled. | ||
234 | # (Since 9.1) | ||
151 | @@ -XXX,XX +XXX,XX @@ | 235 | @@ -XXX,XX +XXX,XX @@ |
152 | 'cpu-throttle-initial', 'cpu-throttle-increment', | 236 | 'cpu-throttle-initial', 'cpu-throttle-increment', |
153 | 'cpu-throttle-tailslow', | 237 | 'cpu-throttle-tailslow', |
154 | 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', | 238 | 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', |
155 | - 'avail-switchover-bandwidth', 'downtime-limit', | 239 | - 'avail-switchover-bandwidth', 'downtime-limit', |
156 | + 'avail-switchover-bandwidth', 'downtime-limit', 'multifd-dsa-accel', | 240 | + 'avail-switchover-bandwidth', 'downtime-limit', 'accel-path', |
157 | { 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] }, | 241 | { 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] }, |
158 | { 'name': 'block-incremental', 'features': [ 'deprecated' ] }, | ||
159 | 'multifd-channels', | 242 | 'multifd-channels', |
160 | @@ -XXX,XX +XXX,XX @@ | 243 | 'xbzrle-cache-size', 'max-postcopy-bandwidth', |
161 | # @mode: Migration mode. See description in @MigMode. Default is 'normal'. | 244 | @@ -XXX,XX +XXX,XX @@ |
162 | # (Since 8.2) | 245 | # See description in @ZeroPageDetection. Default is 'multifd'. |
246 | # (since 9.0) | ||
163 | # | 247 | # |
164 | +# @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | 248 | +# @accel-path: If enabled, specify the accelerator paths that to be |
165 | +# certain memory operations. (since 8.2) | 249 | +# used in QEMU. For example, enable DSA accelerator for zero page |
250 | +# detection offloading by setting the @zero-page-detection to | ||
251 | +# dsa-accel, and defines the accel-path to "dsa:<dsa_device path>". | ||
252 | +# This parameter is default to an empty list. (Since 10.0) | ||
166 | +# | 253 | +# |
167 | # Features: | 254 | # @direct-io: Open migration files with O_DIRECT when possible. This |
168 | # | 255 | # only has effect if the @mapped-ram capability is enabled. |
169 | # @deprecated: Member @block-incremental is deprecated. Use | 256 | # (Since 9.1) |
170 | @@ -XXX,XX +XXX,XX @@ | 257 | @@ -XXX,XX +XXX,XX @@ |
171 | '*x-vcpu-dirty-limit-period': { 'type': 'uint64', | ||
172 | 'features': [ 'unstable' ] }, | ||
173 | '*vcpu-dirty-limit': 'uint64', | 258 | '*vcpu-dirty-limit': 'uint64', |
174 | - '*mode': 'MigMode'} } | 259 | '*mode': 'MigMode', |
175 | + '*mode': 'MigMode', | 260 | '*zero-page-detection': 'ZeroPageDetection', |
176 | + '*multifd-dsa-accel': 'StrOrNull'} } | 261 | - '*direct-io': 'bool' } } |
262 | + '*direct-io': 'bool', | ||
263 | + '*accel-path': [ 'str' ] } } | ||
177 | 264 | ||
178 | ## | 265 | ## |
179 | # @migrate-set-parameters: | 266 | # @migrate-set-parameters: |
180 | @@ -XXX,XX +XXX,XX @@ | 267 | @@ -XXX,XX +XXX,XX @@ |
181 | # @mode: Migration mode. See description in @MigMode. Default is 'normal'. | 268 | # See description in @ZeroPageDetection. Default is 'multifd'. |
182 | # (Since 8.2) | 269 | # (since 9.0) |
183 | # | 270 | # |
184 | +# @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | 271 | +# @accel-path: If enabled, specify the accelerator paths that to be |
185 | +# certain memory operations. (since 8.2) | 272 | +# used in QEMU. For example, enable DSA accelerator for zero page |
273 | +# detection offloading by setting the @zero-page-detection to | ||
274 | +# dsa-accel, and defines the accel-path to "dsa:<dsa_device path>". | ||
275 | +# This parameter is default to an empty list. (Since 10.0) | ||
186 | +# | 276 | +# |
187 | # Features: | 277 | # @direct-io: Open migration files with O_DIRECT when possible. This |
188 | # | 278 | # only has effect if the @mapped-ram capability is enabled. |
189 | # @deprecated: Member @block-incremental is deprecated. Use | 279 | # (Since 9.1) |
190 | @@ -XXX,XX +XXX,XX @@ | 280 | @@ -XXX,XX +XXX,XX @@ |
191 | '*x-vcpu-dirty-limit-period': { 'type': 'uint64', | ||
192 | 'features': [ 'unstable' ] }, | ||
193 | '*vcpu-dirty-limit': 'uint64', | 281 | '*vcpu-dirty-limit': 'uint64', |
194 | - '*mode': 'MigMode'} } | 282 | '*mode': 'MigMode', |
195 | + '*mode': 'MigMode', | 283 | '*zero-page-detection': 'ZeroPageDetection', |
196 | + '*multifd-dsa-accel': 'str'} } | 284 | - '*direct-io': 'bool' } } |
285 | + '*direct-io': 'bool', | ||
286 | + '*accel-path': [ 'str' ] } } | ||
197 | 287 | ||
198 | ## | 288 | ## |
199 | # @query-migrate-parameters: | 289 | # @query-migrate-parameters: |
200 | diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh | 290 | diff --git a/util/dsa.c b/util/dsa.c |
201 | index XXXXXXX..XXXXXXX 100644 | 291 | index XXXXXXX..XXXXXXX 100644 |
202 | --- a/scripts/meson-buildoptions.sh | 292 | --- a/util/dsa.c |
203 | +++ b/scripts/meson-buildoptions.sh | 293 | +++ b/util/dsa.c |
204 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { | 294 | @@ -XXX,XX +XXX,XX @@ |
205 | printf "%s\n" ' --enable-debug-stack-usage' | 295 | #include "qemu/bswap.h" |
206 | printf "%s\n" ' measure coroutine stack usage' | 296 | #include "qemu/error-report.h" |
207 | printf "%s\n" ' --enable-debug-tcg TCG debugging' | 297 | #include "qemu/rcu.h" |
208 | + printf "%s\n" ' --enable-enqcmd MENQCMD optimizations' | 298 | +#include <cpuid.h> |
209 | printf "%s\n" ' --enable-fdt[=CHOICE] Whether and how to find the libfdt library' | 299 | |
210 | printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' | 300 | #pragma GCC push_options |
211 | printf "%s\n" ' --enable-fuzzing build fuzzing targets' | 301 | #pragma GCC target("enqcmd") |
212 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { | 302 | @@ -XXX,XX +XXX,XX @@ static void dsa_completion_thread_stop(void *opaque) |
213 | printf "%s\n" ' avx2 AVX2 optimizations' | 303 | qemu_sem_destroy(&thread_context->sem_init_done); |
214 | printf "%s\n" ' avx512bw AVX512BW optimizations' | 304 | } |
215 | printf "%s\n" ' avx512f AVX512F optimizations' | 305 | |
216 | - printf "%s\n" ' enqcmd ENQCMD optimizations' | 306 | +/** |
217 | printf "%s\n" ' blkio libblkio block device driver' | 307 | + * @brief Check if DSA is supported. |
218 | printf "%s\n" ' bochs bochs image format support' | 308 | + * |
219 | printf "%s\n" ' bpf eBPF support' | 309 | + * @return True if DSA is supported, otherwise false. |
220 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { | 310 | + */ |
221 | --disable-avx512bw) printf "%s" -Davx512bw=disabled ;; | 311 | +bool qemu_dsa_is_supported(void) |
222 | --enable-avx512f) printf "%s" -Davx512f=enabled ;; | 312 | +{ |
223 | --disable-avx512f) printf "%s" -Davx512f=disabled ;; | 313 | + /* |
224 | - --enable-enqcmd) printf "%s" -Denqcmd=true ;; | 314 | + * movdir64b is indicated by bit 28 of ecx in CPUID leaf 7, subleaf 0. |
225 | - --disable-enqcmd) printf "%s" -Denqcmd=false ;; | 315 | + * enqcmd is indicated by bit 29 of ecx in CPUID leaf 7, subleaf 0. |
226 | --enable-gcov) printf "%s" -Db_coverage=true ;; | 316 | + * Doc: https://cdrdv2-public.intel.com/819680/architecture-instruction-\ |
227 | --disable-gcov) printf "%s" -Db_coverage=false ;; | 317 | + * set-extensions-programming-reference.pdf |
228 | --enable-lto) printf "%s" -Db_lto=true ;; | 318 | + */ |
229 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { | 319 | + uint32_t eax, ebx, ecx, edx; |
230 | --disable-docs) printf "%s" -Ddocs=disabled ;; | 320 | + bool movedirb_enabled; |
231 | --enable-dsound) printf "%s" -Ddsound=enabled ;; | 321 | + bool enqcmd_enabled; |
232 | --disable-dsound) printf "%s" -Ddsound=disabled ;; | 322 | + |
233 | + --enable-enqcmd) printf "%s" -Denqcmd=true ;; | 323 | + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); |
234 | + --disable-enqcmd) printf "%s" -Denqcmd=false ;; | 324 | + movedirb_enabled = (ecx >> 28) & 0x1; |
235 | --enable-fdt) printf "%s" -Dfdt=enabled ;; | 325 | + if (!movedirb_enabled) { |
236 | --disable-fdt) printf "%s" -Dfdt=disabled ;; | 326 | + return false; |
237 | --enable-fdt=*) quote_sh "-Dfdt=$2" ;; | 327 | + } |
328 | + enqcmd_enabled = (ecx >> 29) & 0x1; | ||
329 | + if (!enqcmd_enabled) { | ||
330 | + return false; | ||
331 | + } | ||
332 | + | ||
333 | + return true; | ||
334 | +} | ||
335 | + | ||
336 | /** | ||
337 | * @brief Check if DSA is running. | ||
338 | * | ||
238 | -- | 339 | -- |
239 | 2.30.2 | 340 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | Multifd sender path gets an array of pages queued by the migration | 3 | Multifd sender path gets an array of pages queued by the migration |
2 | thread. It performs zero page checking on every page in the array. | 4 | thread. It performs zero page checking on every page in the array. |
3 | The pages are classfied as either a zero page or a normal page. This | 5 | The pages are classfied as either a zero page or a normal page. This |
4 | change uses Intel DSA to offload the zero page checking from CPU to | 6 | change uses Intel DSA to offload the zero page checking from CPU to |
5 | the DSA accelerator. The sender thread submits a batch of pages to DSA | 7 | the DSA accelerator. The sender thread submits a batch of pages to DSA |
6 | hardware and waits for the DSA completion thread to signal for work | 8 | hardware and waits for the DSA completion thread to signal for work |
7 | completion. | 9 | completion. |
8 | 10 | ||
9 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 11 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
12 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
10 | --- | 13 | --- |
11 | migration/multifd.c | 33 ++++++++++++++++++++++++++++++++- | 14 | migration/multifd-zero-page.c | 149 ++++++++++++++++++++++++++++++---- |
12 | 1 file changed, 32 insertions(+), 1 deletion(-) | 15 | migration/multifd.c | 23 +++++- |
16 | migration/multifd.h | 6 ++ | ||
17 | migration/options.c | 13 +++ | ||
18 | migration/options.h | 1 + | ||
19 | 5 files changed, 176 insertions(+), 16 deletions(-) | ||
13 | 20 | ||
21 | diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/migration/multifd-zero-page.c | ||
24 | +++ b/migration/multifd-zero-page.c | ||
25 | @@ -XXX,XX +XXX,XX @@ | ||
26 | |||
27 | static bool multifd_zero_page_enabled(void) | ||
28 | { | ||
29 | - return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD; | ||
30 | + ZeroPageDetection curMethod = migrate_zero_page_detection(); | ||
31 | + return (curMethod == ZERO_PAGE_DETECTION_MULTIFD || | ||
32 | + curMethod == ZERO_PAGE_DETECTION_DSA_ACCEL); | ||
33 | } | ||
34 | |||
35 | static void swap_page_offset(ram_addr_t *pages_offset, int a, int b) | ||
36 | @@ -XXX,XX +XXX,XX @@ static void swap_page_offset(ram_addr_t *pages_offset, int a, int b) | ||
37 | pages_offset[b] = temp; | ||
38 | } | ||
39 | |||
40 | +#ifdef CONFIG_DSA_OPT | ||
41 | + | ||
42 | +static void swap_result(bool *results, int a, int b) | ||
43 | +{ | ||
44 | + bool temp; | ||
45 | + | ||
46 | + if (a == b) { | ||
47 | + return; | ||
48 | + } | ||
49 | + | ||
50 | + temp = results[a]; | ||
51 | + results[a] = results[b]; | ||
52 | + results[b] = temp; | ||
53 | +} | ||
54 | + | ||
55 | /** | ||
56 | - * multifd_send_zero_page_detect: Perform zero page detection on all pages. | ||
57 | + * zero_page_detect_dsa: Perform zero page detection using | ||
58 | + * Intel Data Streaming Accelerator (DSA). | ||
59 | * | ||
60 | - * Sorts normal pages before zero pages in p->pages->offset and updates | ||
61 | - * p->pages->normal_num. | ||
62 | + * Sorts normal pages before zero pages in pages->offset and updates | ||
63 | + * pages->normal_num. | ||
64 | * | ||
65 | * @param p A pointer to the send params. | ||
66 | */ | ||
67 | -void multifd_send_zero_page_detect(MultiFDSendParams *p) | ||
68 | +static void zero_page_detect_dsa(MultiFDSendParams *p) | ||
69 | { | ||
70 | MultiFDPages_t *pages = &p->data->u.ram; | ||
71 | RAMBlock *rb = pages->block; | ||
72 | - int i = 0; | ||
73 | - int j = pages->num - 1; | ||
74 | + bool *results = p->dsa_batch_task->results; | ||
75 | |||
76 | - if (!multifd_zero_page_enabled()) { | ||
77 | - pages->normal_num = pages->num; | ||
78 | - goto out; | ||
79 | + for (int i = 0; i < pages->num; i++) { | ||
80 | + p->dsa_batch_task->addr[i] = | ||
81 | + (ram_addr_t)(rb->host + pages->offset[i]); | ||
82 | } | ||
83 | |||
84 | + buffer_is_zero_dsa_batch_sync(p->dsa_batch_task, | ||
85 | + (const void **)p->dsa_batch_task->addr, | ||
86 | + pages->num, | ||
87 | + multifd_ram_page_size()); | ||
88 | + | ||
89 | + int i = 0; | ||
90 | + int j = pages->num - 1; | ||
91 | + | ||
92 | /* | ||
93 | * Sort the page offset array by moving all normal pages to | ||
94 | * the left and all zero pages to the right of the array. | ||
95 | @@ -XXX,XX +XXX,XX @@ void multifd_send_zero_page_detect(MultiFDSendParams *p) | ||
96 | while (i <= j) { | ||
97 | uint64_t offset = pages->offset[i]; | ||
98 | |||
99 | - if (!buffer_is_zero(rb->host + offset, multifd_ram_page_size())) { | ||
100 | + if (!results[i]) { | ||
101 | i++; | ||
102 | continue; | ||
103 | } | ||
104 | |||
105 | + swap_result(results, i, j); | ||
106 | swap_page_offset(pages->offset, i, j); | ||
107 | ram_release_page(rb->idstr, offset); | ||
108 | j--; | ||
109 | } | ||
110 | |||
111 | pages->normal_num = i; | ||
112 | +} | ||
113 | |||
114 | -out: | ||
115 | - stat64_add(&mig_stats.normal_pages, pages->normal_num); | ||
116 | - stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num); | ||
117 | +int multifd_dsa_setup(MigrationState *s, Error *local_err) | ||
118 | +{ | ||
119 | + g_autofree strList *dsa_parameter = g_malloc0(sizeof(strList)); | ||
120 | + migrate_dsa_accel_path(&dsa_parameter); | ||
121 | + if (qemu_dsa_init(dsa_parameter, &local_err)) { | ||
122 | + migrate_set_error(s, local_err); | ||
123 | + return -1; | ||
124 | + } else { | ||
125 | + qemu_dsa_start(); | ||
126 | + } | ||
127 | + | ||
128 | + return 0; | ||
129 | +} | ||
130 | + | ||
131 | +void multifd_dsa_cleanup(void) | ||
132 | +{ | ||
133 | + qemu_dsa_cleanup(); | ||
134 | +} | ||
135 | + | ||
136 | +#else | ||
137 | + | ||
138 | +static void zero_page_detect_dsa(MultiFDSendParams *p) | ||
139 | +{ | ||
140 | + g_assert_not_reached(); | ||
141 | } | ||
142 | |||
143 | +int multifd_dsa_setup(MigrationState *s, Error *local_err) | ||
144 | +{ | ||
145 | + g_assert_not_reached(); | ||
146 | + return -1; | ||
147 | +} | ||
148 | + | ||
149 | +void multifd_dsa_cleanup(void) | ||
150 | +{ | ||
151 | + return ; | ||
152 | +} | ||
153 | + | ||
154 | +#endif | ||
155 | + | ||
156 | void multifd_recv_zero_page_process(MultiFDRecvParams *p) | ||
157 | { | ||
158 | for (int i = 0; i < p->zero_num; i++) { | ||
159 | @@ -XXX,XX +XXX,XX @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | + | ||
164 | +/** | ||
165 | + * zero_page_detect_cpu: Perform zero page detection using CPU. | ||
166 | + * | ||
167 | + * Sorts normal pages before zero pages in p->pages->offset and updates | ||
168 | + * p->pages->normal_num. | ||
169 | + * | ||
170 | + * @param p A pointer to the send params. | ||
171 | + */ | ||
172 | +static void zero_page_detect_cpu(MultiFDSendParams *p) | ||
173 | +{ | ||
174 | + MultiFDPages_t *pages = &p->data->u.ram; | ||
175 | + RAMBlock *rb = pages->block; | ||
176 | + int i = 0; | ||
177 | + int j = pages->num - 1; | ||
178 | + | ||
179 | + /* | ||
180 | + * Sort the page offset array by moving all normal pages to | ||
181 | + * the left and all zero pages to the right of the array. | ||
182 | + */ | ||
183 | + while (i <= j) { | ||
184 | + uint64_t offset = pages->offset[i]; | ||
185 | + | ||
186 | + if (!buffer_is_zero(rb->host + offset, multifd_ram_page_size())) { | ||
187 | + i++; | ||
188 | + continue; | ||
189 | + } | ||
190 | + | ||
191 | + swap_page_offset(pages->offset, i, j); | ||
192 | + ram_release_page(rb->idstr, offset); | ||
193 | + j--; | ||
194 | + } | ||
195 | + | ||
196 | + pages->normal_num = i; | ||
197 | +} | ||
198 | + | ||
199 | +/** | ||
200 | + * multifd_send_zero_page_detect: Perform zero page detection on all pages. | ||
201 | + * | ||
202 | + * @param p A pointer to the send params. | ||
203 | + */ | ||
204 | +void multifd_send_zero_page_detect(MultiFDSendParams *p) | ||
205 | +{ | ||
206 | + MultiFDPages_t *pages = &p->data->u.ram; | ||
207 | + | ||
208 | + if (!multifd_zero_page_enabled()) { | ||
209 | + pages->normal_num = pages->num; | ||
210 | + goto out; | ||
211 | + } | ||
212 | + | ||
213 | + if (qemu_dsa_is_running()) { | ||
214 | + zero_page_detect_dsa(p); | ||
215 | + } else { | ||
216 | + zero_page_detect_cpu(p); | ||
217 | + } | ||
218 | + | ||
219 | +out: | ||
220 | + stat64_add(&mig_stats.normal_pages, pages->normal_num); | ||
221 | + stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num); | ||
222 | +} | ||
14 | diff --git a/migration/multifd.c b/migration/multifd.c | 223 | diff --git a/migration/multifd.c b/migration/multifd.c |
15 | index XXXXXXX..XXXXXXX 100644 | 224 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/migration/multifd.c | 225 | --- a/migration/multifd.c |
17 | +++ b/migration/multifd.c | 226 | +++ b/migration/multifd.c |
18 | @@ -XXX,XX +XXX,XX @@ void multifd_save_cleanup(void) | 227 | @@ -XXX,XX +XXX,XX @@ |
19 | qemu_thread_join(&p->thread); | 228 | #include "qemu/osdep.h" |
20 | } | 229 | #include "qemu/cutils.h" |
21 | } | 230 | #include "qemu/rcu.h" |
22 | + dsa_stop(); | 231 | +#include "qemu/dsa.h" |
23 | + dsa_cleanup(); | 232 | #include "exec/target_page.h" |
233 | #include "system/system.h" | ||
234 | #include "exec/ramblock.h" | ||
235 | @@ -XXX,XX +XXX,XX @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) | ||
236 | p->name = NULL; | ||
237 | g_free(p->data); | ||
238 | p->data = NULL; | ||
239 | + buffer_zero_batch_task_destroy(p->dsa_batch_task); | ||
240 | + p->dsa_batch_task = NULL; | ||
241 | p->packet_len = 0; | ||
242 | g_free(p->packet); | ||
243 | p->packet = NULL; | ||
244 | @@ -XXX,XX +XXX,XX @@ void multifd_send_shutdown(void) | ||
245 | |||
246 | multifd_send_terminate_threads(); | ||
247 | |||
248 | + multifd_dsa_cleanup(); | ||
249 | + | ||
24 | for (i = 0; i < migrate_multifd_channels(); i++) { | 250 | for (i = 0; i < migrate_multifd_channels(); i++) { |
25 | MultiFDSendParams *p = &multifd_send_state->params[i]; | 251 | MultiFDSendParams *p = &multifd_send_state->params[i]; |
26 | Error *local_err = NULL; | 252 | Error *local_err = NULL; |
27 | @@ -XXX,XX +XXX,XX @@ static void buffer_is_zero_use_cpu(MultiFDSendParams *p) | 253 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) |
28 | { | 254 | uint32_t page_count = multifd_ram_page_count(); |
29 | const void **buf = (const void **)p->addr; | 255 | bool use_packets = multifd_use_packets(); |
30 | assert(!migrate_use_main_zero_page()); | ||
31 | + assert(!dsa_is_running()); | ||
32 | |||
33 | for (int i = 0; i < p->pages->num; i++) { | ||
34 | p->batch_task->results[i] = buffer_is_zero(buf[i], p->page_size); | ||
35 | @@ -XXX,XX +XXX,XX @@ static void buffer_is_zero_use_cpu(MultiFDSendParams *p) | ||
36 | |||
37 | static void set_normal_pages(MultiFDSendParams *p) | ||
38 | { | ||
39 | + assert(migrate_use_main_zero_page()); | ||
40 | + | ||
41 | for (int i = 0; i < p->pages->num; i++) { | ||
42 | p->batch_task->results[i] = false; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | +static void buffer_is_zero_use_dsa(MultiFDSendParams *p) | ||
47 | +{ | ||
48 | + assert(!migrate_use_main_zero_page()); | ||
49 | + assert(dsa_is_running()); | ||
50 | + | ||
51 | + buffer_is_zero_dsa_batch_async(p->batch_task, | ||
52 | + (const void **)p->addr, | ||
53 | + p->pages->num, | ||
54 | + p->page_size); | ||
55 | +} | ||
56 | + | ||
57 | static void multifd_zero_page_check(MultiFDSendParams *p) | ||
58 | { | ||
59 | /* older qemu don't understand zero page on multifd channel */ | ||
60 | bool use_multifd_zero_page = !migrate_use_main_zero_page(); | ||
61 | + bool use_multifd_dsa_accel = dsa_is_running(); | ||
62 | |||
63 | RAMBlock *rb = p->pages->block; | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ static void multifd_zero_page_check(MultiFDSendParams *p) | ||
66 | p->addr[i] = (ram_addr_t)(rb->host + p->pages->offset[i]); | ||
67 | } | ||
68 | |||
69 | - if (use_multifd_zero_page) { | ||
70 | + if (use_multifd_dsa_accel && use_multifd_zero_page) { | ||
71 | + buffer_is_zero_use_dsa(p); | ||
72 | + } else if (use_multifd_zero_page) { | ||
73 | buffer_is_zero_use_cpu(p); | ||
74 | } else { | ||
75 | // No zero page checking. All pages are normal pages. | ||
76 | @@ -XXX,XX +XXX,XX @@ int multifd_save_setup(Error **errp) | ||
77 | int thread_count; | ||
78 | uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); | ||
79 | uint8_t i; | 256 | uint8_t i; |
80 | + const char *dsa_parameter = migrate_multifd_dsa_accel(); | 257 | + Error *local_err = NULL; |
81 | 258 | ||
82 | if (!migrate_multifd()) { | 259 | if (!migrate_multifd()) { |
83 | return 0; | 260 | return true; |
84 | } | 261 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) |
85 | 262 | qatomic_set(&multifd_send_state->exiting, 0); | |
86 | + dsa_init(dsa_parameter); | 263 | multifd_send_state->ops = multifd_ops[migrate_multifd_compression()]; |
87 | + dsa_start(); | 264 | |
88 | + | 265 | + if (ret) { |
89 | thread_count = migrate_multifd_channels(); | 266 | + goto err; |
90 | multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); | 267 | + } |
91 | multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); | 268 | + |
92 | @@ -XXX,XX +XXX,XX @@ int multifd_save_setup(Error **errp) | 269 | for (i = 0; i < thread_count; i++) { |
93 | return ret; | 270 | MultiFDSendParams *p = &multifd_send_state->params[i]; |
271 | - Error *local_err = NULL; | ||
272 | |||
273 | qemu_sem_init(&p->sem, 0); | ||
274 | qemu_sem_init(&p->sem_sync, 0); | ||
275 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) | ||
276 | goto err; | ||
277 | } | ||
278 | |||
279 | + if (s && ret == 0 && | ||
280 | + s->parameters.zero_page_detection == ZERO_PAGE_DETECTION_DSA_ACCEL) { | ||
281 | + ret = multifd_dsa_setup(s, local_err); | ||
282 | + } | ||
283 | + | ||
284 | + if (ret) { | ||
285 | + goto err; | ||
286 | + } | ||
287 | + | ||
288 | for (i = 0; i < thread_count; i++) { | ||
289 | MultiFDSendParams *p = &multifd_send_state->params[i]; | ||
290 | - Error *local_err = NULL; | ||
291 | |||
292 | + p->dsa_batch_task = buffer_zero_batch_task_init(page_count); | ||
293 | ret = multifd_send_state->ops->send_setup(p, &local_err); | ||
294 | if (ret) { | ||
295 | migrate_set_error(s, local_err); | ||
296 | @@ -XXX,XX +XXX,XX @@ void multifd_recv_cleanup(void) | ||
297 | qemu_thread_join(&p->thread); | ||
94 | } | 298 | } |
95 | } | 299 | } |
96 | + | 300 | + multifd_dsa_cleanup(); |
97 | return 0; | ||
98 | } | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ void multifd_load_cleanup(void) | ||
101 | |||
102 | qemu_thread_join(&p->thread); | ||
103 | } | ||
104 | + dsa_stop(); | ||
105 | + dsa_cleanup(); | ||
106 | for (i = 0; i < migrate_multifd_channels(); i++) { | 301 | for (i = 0; i < migrate_multifd_channels(); i++) { |
107 | MultiFDRecvParams *p = &multifd_recv_state->params[i]; | 302 | multifd_recv_cleanup_channel(&multifd_recv_state->params[i]); |
108 | 303 | } | |
109 | @@ -XXX,XX +XXX,XX @@ int multifd_load_setup(Error **errp) | 304 | diff --git a/migration/multifd.h b/migration/multifd.h |
110 | int thread_count; | 305 | index XXXXXXX..XXXXXXX 100644 |
111 | uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); | 306 | --- a/migration/multifd.h |
112 | uint8_t i; | 307 | +++ b/migration/multifd.h |
113 | + const char *dsa_parameter = migrate_multifd_dsa_accel(); | 308 | @@ -XXX,XX +XXX,XX @@ |
114 | 309 | ||
115 | /* | 310 | #include "exec/target_page.h" |
116 | * Return successfully if multiFD recv state is already initialised | 311 | #include "ram.h" |
117 | @@ -XXX,XX +XXX,XX @@ int multifd_load_setup(Error **errp) | 312 | +#include "qemu/dsa.h" |
118 | return 0; | 313 | |
119 | } | 314 | typedef struct MultiFDRecvData MultiFDRecvData; |
120 | 315 | typedef struct MultiFDSendData MultiFDSendData; | |
121 | + dsa_init(dsa_parameter); | 316 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
122 | + dsa_start(); | 317 | bool pending_sync; |
123 | + | 318 | MultiFDSendData *data; |
124 | thread_count = migrate_multifd_channels(); | 319 | |
125 | multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); | 320 | + /* Zero page checking batch task */ |
126 | multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); | 321 | + QemuDsaBatchTask *dsa_batch_task; |
127 | @@ -XXX,XX +XXX,XX @@ int multifd_load_setup(Error **errp) | 322 | + |
128 | return ret; | 323 | /* thread local variables. No locking required */ |
129 | } | 324 | |
130 | } | 325 | /* pointer to the packet */ |
131 | + | 326 | @@ -XXX,XX +XXX,XX @@ void multifd_send_fill_packet(MultiFDSendParams *p); |
132 | return 0; | 327 | bool multifd_send_prepare_common(MultiFDSendParams *p); |
133 | } | 328 | void multifd_send_zero_page_detect(MultiFDSendParams *p); |
329 | void multifd_recv_zero_page_process(MultiFDRecvParams *p); | ||
330 | +int multifd_dsa_setup(MigrationState *s, Error *local_err); | ||
331 | +void multifd_dsa_cleanup(void); | ||
332 | |||
333 | static inline void multifd_send_prepare_header(MultiFDSendParams *p) | ||
334 | { | ||
335 | diff --git a/migration/options.c b/migration/options.c | ||
336 | index XXXXXXX..XXXXXXX 100644 | ||
337 | --- a/migration/options.c | ||
338 | +++ b/migration/options.c | ||
339 | @@ -XXX,XX +XXX,XX @@ const strList *migrate_accel_path(void) | ||
340 | return s->parameters.accel_path; | ||
341 | } | ||
342 | |||
343 | +void migrate_dsa_accel_path(strList **dsa_accel_path) | ||
344 | +{ | ||
345 | + MigrationState *s = migrate_get_current(); | ||
346 | + strList *accel_path = s->parameters.accel_path; | ||
347 | + strList **tail = dsa_accel_path; | ||
348 | + while (accel_path) { | ||
349 | + if (strncmp(accel_path->value, "dsa:", 4) == 0) { | ||
350 | + QAPI_LIST_APPEND(tail, &accel_path->value[4]); | ||
351 | + } | ||
352 | + accel_path = accel_path->next; | ||
353 | + } | ||
354 | +} | ||
355 | + | ||
356 | const char *migrate_tls_hostname(void) | ||
357 | { | ||
358 | MigrationState *s = migrate_get_current(); | ||
359 | diff --git a/migration/options.h b/migration/options.h | ||
360 | index XXXXXXX..XXXXXXX 100644 | ||
361 | --- a/migration/options.h | ||
362 | +++ b/migration/options.h | ||
363 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_hostname(void); | ||
364 | uint64_t migrate_xbzrle_cache_size(void); | ||
365 | ZeroPageDetection migrate_zero_page_detection(void); | ||
366 | const strList *migrate_accel_path(void); | ||
367 | +void migrate_dsa_accel_path(strList **dsa_accel_path); | ||
368 | |||
369 | /* parameters helpers */ | ||
134 | 370 | ||
135 | -- | 371 | -- |
136 | 2.30.2 | 372 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * Test DSA start and stop path. | 3 | * Test DSA start and stop path. |
2 | * Test DSA configure and cleanup path. | 4 | * Test DSA configure and cleanup path. |
3 | * Test DSA task submission and completion path. | 5 | * Test DSA task submission and completion path. |
4 | 6 | ||
5 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> | 7 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> |
6 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 8 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
9 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
7 | --- | 10 | --- |
8 | tests/unit/meson.build | 6 + | 11 | tests/unit/meson.build | 6 + |
9 | tests/unit/test-dsa.c | 466 +++++++++++++++++++++++++++++++++++++++++ | 12 | tests/unit/test-dsa.c | 504 +++++++++++++++++++++++++++++++++++++++++ |
10 | 2 files changed, 472 insertions(+) | 13 | 2 files changed, 510 insertions(+) |
11 | create mode 100644 tests/unit/test-dsa.c | 14 | create mode 100644 tests/unit/test-dsa.c |
12 | 15 | ||
13 | diff --git a/tests/unit/meson.build b/tests/unit/meson.build | 16 | diff --git a/tests/unit/meson.build b/tests/unit/meson.build |
14 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/unit/meson.build | 18 | --- a/tests/unit/meson.build |
16 | +++ b/tests/unit/meson.build | 19 | +++ b/tests/unit/meson.build |
17 | @@ -XXX,XX +XXX,XX @@ tests = { | 20 | @@ -XXX,XX +XXX,XX @@ tests = { |
18 | 'test-virtio-dmabuf': [meson.project_source_root() / 'hw/display/virtio-dmabuf.c'], | 21 | 'test-fifo': [], |
19 | } | 22 | } |
20 | 23 | ||
21 | +if config_host_data.get('CONFIG_DSA_OPT') | 24 | +if config_host_data.get('CONFIG_DSA_OPT') |
22 | + tests += { | 25 | + tests += { |
23 | + 'test-dsa': [], | 26 | + 'test-dsa': [], |
... | ... | ||
34 | +++ b/tests/unit/test-dsa.c | 37 | +++ b/tests/unit/test-dsa.c |
35 | @@ -XXX,XX +XXX,XX @@ | 38 | @@ -XXX,XX +XXX,XX @@ |
36 | +/* | 39 | +/* |
37 | + * Test DSA functions. | 40 | + * Test DSA functions. |
38 | + * | 41 | + * |
39 | + * Copyright (c) 2023 Hao Xiang <hao.xiang@bytedance.com> | 42 | + * Copyright (C) Bytedance Ltd. |
40 | + * Copyright (c) 2023 Bryan Zhang <bryan.zhang@bytedance.com> | ||
41 | + * | 43 | + * |
42 | + * This library is free software; you can redistribute it and/or | 44 | + * Authors: |
43 | + * modify it under the terms of the GNU Lesser General Public | 45 | + * Hao Xiang <hao.xiang@bytedance.com> |
44 | + * License as published by the Free Software Foundation; either | 46 | + * Bryan Zhang <bryan.zhang@bytedance.com> |
45 | + * version 2.1 of the License, or (at your option) any later version. | 47 | + * Yichen Wang <yichen.wang@bytedance.com> |
46 | + * | 48 | + * |
47 | + * This library is distributed in the hope that it will be useful, | 49 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. |
48 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | 50 | + * See the COPYING file in the top-level directory. |
49 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
50 | + * Lesser General Public License for more details. | ||
51 | + * | ||
52 | + * You should have received a copy of the GNU Lesser General Public | ||
53 | + * License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
54 | + */ | 51 | + */ |
52 | + | ||
55 | +#include "qemu/osdep.h" | 53 | +#include "qemu/osdep.h" |
56 | +#include "qemu/host-utils.h" | 54 | +#include "qemu/host-utils.h" |
57 | + | 55 | + |
58 | +#include "qemu/cutils.h" | 56 | +#include "qemu/cutils.h" |
59 | +#include "qemu/memalign.h" | 57 | +#include "qemu/memalign.h" |
60 | +#include "qemu/dsa.h" | 58 | +#include "qemu/dsa.h" |
61 | + | 59 | + |
62 | +// TODO Make these not-hardcoded. | 60 | +/* |
63 | +static const char *path1 = "/dev/dsa/wq4.0"; | 61 | + * TODO Communicate that DSA must be configured to support this batch size. |
64 | +static const char *path2 = "/dev/dsa/wq4.0 /dev/dsa/wq4.1"; | 62 | + * TODO Alternatively, poke the DSA device to figure out batch size. |
65 | +static const int num_devices = 2; | 63 | + */ |
66 | + | 64 | +#define batch_size 128 |
67 | +static struct buffer_zero_batch_task batch_task __attribute__((aligned(64))); | 65 | +#define page_size 4096 |
68 | + | 66 | + |
69 | +// TODO Communicate that DSA must be configured to support this batch size. | 67 | +#define oversized_batch_size (batch_size + 1) |
70 | +// TODO Alternatively, poke the DSA device to figure out batch size. | 68 | +#define num_devices 2 |
71 | +static int batch_size = 128; | 69 | +#define max_buffer_size (64 * 1024) |
72 | +static int page_size = 4096; | 70 | + |
73 | + | 71 | +/* TODO Make these not-hardcoded. */ |
74 | +// A helper for running a single task and checking for correctness. | 72 | +static const strList path1[] = { |
73 | + {.value = (char *)"/dev/dsa/wq4.0", .next = NULL} | ||
74 | +}; | ||
75 | +static const strList path2[] = { | ||
76 | + {.value = (char *)"/dev/dsa/wq4.0", .next = (strList*)&path2[1]}, | ||
77 | + {.value = (char *)"/dev/dsa/wq4.1", .next = NULL} | ||
78 | +}; | ||
79 | + | ||
80 | +static Error **errp; | ||
81 | + | ||
82 | +static QemuDsaBatchTask *task; | ||
83 | + | ||
84 | +/* A helper for running a single task and checking for correctness. */ | ||
75 | +static void do_single_task(void) | 85 | +static void do_single_task(void) |
76 | +{ | 86 | +{ |
77 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 87 | + task = buffer_zero_batch_task_init(batch_size); |
78 | + char buf[page_size]; | 88 | + char buf[page_size]; |
79 | + char* ptr = buf; | 89 | + char *ptr = buf; |
80 | + | 90 | + |
81 | + buffer_is_zero_dsa_batch_async(&batch_task, | 91 | + buffer_is_zero_dsa_batch_sync(task, |
82 | + (const void**) &ptr, | 92 | + (const void **)&ptr, |
83 | + 1, | 93 | + 1, |
84 | + page_size); | 94 | + page_size); |
85 | + g_assert(batch_task.results[0] == buffer_is_zero(buf, page_size)); | 95 | + g_assert(task->results[0] == buffer_is_zero(buf, page_size)); |
96 | + | ||
97 | + buffer_zero_batch_task_destroy(task); | ||
86 | +} | 98 | +} |
87 | + | 99 | + |
88 | +static void test_single_zero(void) | 100 | +static void test_single_zero(void) |
89 | +{ | 101 | +{ |
90 | + g_assert(!dsa_init(path1)); | 102 | + g_assert(!qemu_dsa_init(path1, errp)); |
91 | + dsa_start(); | 103 | + qemu_dsa_start(); |
92 | + | 104 | + |
93 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 105 | + task = buffer_zero_batch_task_init(batch_size); |
94 | + | 106 | + |
95 | + char buf[page_size]; | 107 | + char buf[page_size]; |
96 | + char* ptr = buf; | 108 | + char *ptr = buf; |
97 | + | 109 | + |
98 | + memset(buf, 0x0, page_size); | 110 | + memset(buf, 0x0, page_size); |
99 | + buffer_is_zero_dsa_batch_async(&batch_task, | 111 | + buffer_is_zero_dsa_batch_sync(task, |
100 | + (const void**) &ptr, | 112 | + (const void **)&ptr, |
101 | + 1, page_size); | 113 | + 1, page_size); |
102 | + g_assert(batch_task.results[0]); | 114 | + g_assert(task->results[0]); |
103 | + | 115 | + |
104 | + dsa_cleanup(); | 116 | + buffer_zero_batch_task_destroy(task); |
117 | + | ||
118 | + qemu_dsa_cleanup(); | ||
105 | +} | 119 | +} |
106 | + | 120 | + |
107 | +static void test_single_zero_async(void) | 121 | +static void test_single_zero_async(void) |
108 | +{ | 122 | +{ |
109 | + test_single_zero(); | 123 | + test_single_zero(); |
110 | +} | 124 | +} |
111 | + | 125 | + |
112 | +static void test_single_nonzero(void) | 126 | +static void test_single_nonzero(void) |
113 | +{ | 127 | +{ |
114 | + g_assert(!dsa_init(path1)); | 128 | + g_assert(!qemu_dsa_init(path1, errp)); |
115 | + dsa_start(); | 129 | + qemu_dsa_start(); |
116 | + | 130 | + |
117 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 131 | + task = buffer_zero_batch_task_init(batch_size); |
118 | + | 132 | + |
119 | + char buf[page_size]; | 133 | + char buf[page_size]; |
120 | + char* ptr = buf; | 134 | + char *ptr = buf; |
121 | + | 135 | + |
122 | + memset(buf, 0x1, page_size); | 136 | + memset(buf, 0x1, page_size); |
123 | + buffer_is_zero_dsa_batch_async(&batch_task, | 137 | + buffer_is_zero_dsa_batch_sync(task, |
124 | + (const void**) &ptr, | 138 | + (const void **)&ptr, |
125 | + 1, page_size); | 139 | + 1, page_size); |
126 | + g_assert(!batch_task.results[0]); | 140 | + g_assert(!task->results[0]); |
127 | + | 141 | + |
128 | + dsa_cleanup(); | 142 | + buffer_zero_batch_task_destroy(task); |
143 | + | ||
144 | + qemu_dsa_cleanup(); | ||
129 | +} | 145 | +} |
130 | + | 146 | + |
131 | +static void test_single_nonzero_async(void) | 147 | +static void test_single_nonzero_async(void) |
132 | +{ | 148 | +{ |
133 | + test_single_nonzero(); | 149 | + test_single_nonzero(); |
134 | +} | 150 | +} |
135 | + | 151 | + |
136 | +// count == 0 should return quickly without calling into DSA. | 152 | +/* count == 0 should return quickly without calling into DSA. */ |
137 | +static void test_zero_count_async(void) | 153 | +static void test_zero_count_async(void) |
138 | +{ | 154 | +{ |
139 | + char buf[page_size]; | 155 | + char buf[page_size]; |
140 | + buffer_is_zero_dsa_batch_async(&batch_task, | 156 | + buffer_is_zero_dsa_batch_sync(task, |
141 | + (const void **) &buf, | 157 | + (const void **)&buf, |
142 | + 0, | 158 | + 0, |
143 | + page_size); | 159 | + page_size); |
144 | +} | 160 | +} |
145 | + | 161 | + |
146 | +static void test_null_task_async(void) | 162 | +static void test_null_task_async(void) |
147 | +{ | 163 | +{ |
148 | + if (g_test_subprocess()) { | 164 | + if (g_test_subprocess()) { |
149 | + g_assert(!dsa_init(path1)); | 165 | + g_assert(!qemu_dsa_init(path1, errp)); |
150 | + | 166 | + |
151 | + char buf[page_size * batch_size]; | 167 | + char buf[page_size * batch_size]; |
152 | + char *addrs[batch_size]; | 168 | + char *addrs[batch_size]; |
153 | + for (int i = 0; i < batch_size; i++) { | 169 | + for (int i = 0; i < batch_size; i++) { |
154 | + addrs[i] = buf + (page_size * i); | 170 | + addrs[i] = buf + (page_size * i); |
155 | + } | 171 | + } |
156 | + | 172 | + |
157 | + buffer_is_zero_dsa_batch_async(NULL, (const void**) addrs, batch_size, | 173 | + buffer_is_zero_dsa_batch_sync(NULL, (const void **)addrs, |
158 | + page_size); | 174 | + batch_size, |
175 | + page_size); | ||
159 | + } else { | 176 | + } else { |
160 | + g_test_trap_subprocess(NULL, 0, 0); | 177 | + g_test_trap_subprocess(NULL, 0, 0); |
161 | + g_test_trap_assert_failed(); | 178 | + g_test_trap_assert_failed(); |
162 | + } | 179 | + } |
163 | +} | 180 | +} |
164 | + | 181 | + |
165 | +static void test_oversized_batch(void) | 182 | +static void test_oversized_batch(void) |
166 | +{ | 183 | +{ |
167 | + g_assert(!dsa_init(path1)); | 184 | + g_assert(!qemu_dsa_init(path1, errp)); |
168 | + dsa_start(); | 185 | + qemu_dsa_start(); |
169 | + | 186 | + |
170 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 187 | + task = buffer_zero_batch_task_init(batch_size); |
171 | + | 188 | + |
172 | + int oversized_batch_size = batch_size + 1; | ||
173 | + char buf[page_size * oversized_batch_size]; | 189 | + char buf[page_size * oversized_batch_size]; |
174 | + char *addrs[batch_size]; | 190 | + char *addrs[batch_size]; |
175 | + for (int i = 0; i < oversized_batch_size; i++) { | 191 | + for (int i = 0; i < oversized_batch_size; i++) { |
176 | + addrs[i] = buf + (page_size * i); | 192 | + addrs[i] = buf + (page_size * i); |
177 | + } | 193 | + } |
178 | + | 194 | + |
179 | + int ret = buffer_is_zero_dsa_batch_async(&batch_task, | 195 | + int ret = buffer_is_zero_dsa_batch_sync(task, |
180 | + (const void**) addrs, | 196 | + (const void **)addrs, |
181 | + oversized_batch_size, | 197 | + oversized_batch_size, |
182 | + page_size); | 198 | + page_size); |
183 | + g_assert(ret != 0); | 199 | + g_assert(ret != 0); |
184 | + | 200 | + |
185 | + dsa_cleanup(); | 201 | + buffer_zero_batch_task_destroy(task); |
202 | + | ||
203 | + qemu_dsa_cleanup(); | ||
186 | +} | 204 | +} |
187 | + | 205 | + |
188 | +static void test_oversized_batch_async(void) | 206 | +static void test_oversized_batch_async(void) |
189 | +{ | 207 | +{ |
190 | + test_oversized_batch(); | 208 | + test_oversized_batch(); |
191 | +} | 209 | +} |
192 | + | 210 | + |
193 | +static void test_zero_len_async(void) | 211 | +static void test_zero_len_async(void) |
194 | +{ | 212 | +{ |
195 | + if (g_test_subprocess()) { | 213 | + if (g_test_subprocess()) { |
196 | + g_assert(!dsa_init(path1)); | 214 | + g_assert(!qemu_dsa_init(path1, errp)); |
197 | + | 215 | + |
198 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 216 | + task = buffer_zero_batch_task_init(batch_size); |
199 | + | 217 | + |
200 | + char buf[page_size]; | 218 | + char buf[page_size]; |
201 | + | 219 | + |
202 | + buffer_is_zero_dsa_batch_async(&batch_task, | 220 | + buffer_is_zero_dsa_batch_sync(task, |
203 | + (const void**) &buf, | 221 | + (const void **)&buf, |
204 | + 1, | 222 | + 1, |
205 | + 0); | 223 | + 0); |
224 | + | ||
225 | + buffer_zero_batch_task_destroy(task); | ||
206 | + } else { | 226 | + } else { |
207 | + g_test_trap_subprocess(NULL, 0, 0); | 227 | + g_test_trap_subprocess(NULL, 0, 0); |
208 | + g_test_trap_assert_failed(); | 228 | + g_test_trap_assert_failed(); |
209 | + } | 229 | + } |
210 | +} | 230 | +} |
211 | + | 231 | + |
212 | +static void test_null_buf_async(void) | 232 | +static void test_null_buf_async(void) |
213 | +{ | 233 | +{ |
214 | + if (g_test_subprocess()) { | 234 | + if (g_test_subprocess()) { |
215 | + g_assert(!dsa_init(path1)); | 235 | + g_assert(!qemu_dsa_init(path1, errp)); |
216 | + | 236 | + |
217 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 237 | + task = buffer_zero_batch_task_init(batch_size); |
218 | + | 238 | + |
219 | + buffer_is_zero_dsa_batch_async(&batch_task, NULL, 1, page_size); | 239 | + buffer_is_zero_dsa_batch_sync(task, NULL, 1, page_size); |
240 | + | ||
241 | + buffer_zero_batch_task_destroy(task); | ||
220 | + } else { | 242 | + } else { |
221 | + g_test_trap_subprocess(NULL, 0, 0); | 243 | + g_test_trap_subprocess(NULL, 0, 0); |
222 | + g_test_trap_assert_failed(); | 244 | + g_test_trap_assert_failed(); |
223 | + } | 245 | + } |
224 | +} | 246 | +} |
225 | + | 247 | + |
226 | +static void test_batch(void) | 248 | +static void test_batch(void) |
227 | +{ | 249 | +{ |
228 | + g_assert(!dsa_init(path1)); | 250 | + g_assert(!qemu_dsa_init(path1, errp)); |
229 | + dsa_start(); | 251 | + qemu_dsa_start(); |
230 | + | 252 | + |
231 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 253 | + task = buffer_zero_batch_task_init(batch_size); |
232 | + | 254 | + |
233 | + char buf[page_size * batch_size]; | 255 | + char buf[page_size * batch_size]; |
234 | + char *addrs[batch_size]; | 256 | + char *addrs[batch_size]; |
235 | + for (int i = 0; i < batch_size; i++) { | 257 | + for (int i = 0; i < batch_size; i++) { |
236 | + addrs[i] = buf + (page_size * i); | 258 | + addrs[i] = buf + (page_size * i); |
237 | + } | 259 | + } |
238 | + | 260 | + |
239 | + // Using whatever is on the stack is somewhat random. | 261 | + /* |
240 | + // Manually set some pages to zero and some to nonzero. | 262 | + * Using whatever is on the stack is somewhat random. |
263 | + * Manually set some pages to zero and some to nonzero. | ||
264 | + */ | ||
241 | + memset(buf + 0, 0, page_size * 10); | 265 | + memset(buf + 0, 0, page_size * 10); |
242 | + memset(buf + (10 * page_size), 0xff, page_size * 10); | 266 | + memset(buf + (10 * page_size), 0xff, page_size * 10); |
243 | + | 267 | + |
244 | + buffer_is_zero_dsa_batch_async(&batch_task, | 268 | + buffer_is_zero_dsa_batch_sync(task, |
245 | + (const void**) addrs, | 269 | + (const void **)addrs, |
246 | + batch_size, | 270 | + batch_size, |
247 | + page_size); | 271 | + page_size); |
248 | + | 272 | + |
249 | + bool is_zero; | 273 | + bool is_zero; |
250 | + for (int i = 0; i < batch_size; i++) { | 274 | + for (int i = 0; i < batch_size; i++) { |
251 | + is_zero = buffer_is_zero((const void*) &buf[page_size * i], page_size); | 275 | + is_zero = buffer_is_zero((const void *)&buf[page_size * i], page_size); |
252 | + g_assert(batch_task.results[i] == is_zero); | 276 | + g_assert(task->results[i] == is_zero); |
253 | + } | 277 | + } |
254 | + dsa_cleanup(); | 278 | + |
279 | + buffer_zero_batch_task_destroy(task); | ||
280 | + | ||
281 | + qemu_dsa_cleanup(); | ||
255 | +} | 282 | +} |
256 | + | 283 | + |
257 | +static void test_batch_async(void) | 284 | +static void test_batch_async(void) |
258 | +{ | 285 | +{ |
259 | + test_batch(); | 286 | + test_batch(); |
260 | +} | 287 | +} |
261 | + | 288 | + |
262 | +static void test_page_fault(void) | 289 | +static void test_page_fault(void) |
263 | +{ | 290 | +{ |
264 | + g_assert(!dsa_init(path1)); | 291 | + g_assert(!qemu_dsa_init(path1, errp)); |
265 | + dsa_start(); | 292 | + qemu_dsa_start(); |
266 | + | 293 | + |
267 | + char* buf[2]; | 294 | + char *buf[2]; |
268 | + int prot = PROT_READ | PROT_WRITE; | 295 | + int prot = PROT_READ | PROT_WRITE; |
269 | + int flags = MAP_SHARED | MAP_ANON; | 296 | + int flags = MAP_SHARED | MAP_ANON; |
270 | + buf[0] = (char*) mmap(NULL, page_size * batch_size, prot, flags, -1, 0); | 297 | + buf[0] = (char *)mmap(NULL, page_size * batch_size, prot, flags, -1, 0); |
271 | + assert(buf[0] != MAP_FAILED); | 298 | + assert(buf[0] != MAP_FAILED); |
272 | + buf[1] = (char*) malloc(page_size * batch_size); | 299 | + buf[1] = (char *)malloc(page_size * batch_size); |
273 | + assert(buf[1] != NULL); | 300 | + assert(buf[1] != NULL); |
274 | + | 301 | + |
275 | + for (int j = 0; j < 2; j++) { | 302 | + for (int j = 0; j < 2; j++) { |
276 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 303 | + task = buffer_zero_batch_task_init(batch_size); |
277 | + | 304 | + |
278 | + char *addrs[batch_size]; | 305 | + char *addrs[batch_size]; |
279 | + for (int i = 0; i < batch_size; i++) { | 306 | + for (int i = 0; i < batch_size; i++) { |
280 | + addrs[i] = buf[j] + (page_size * i); | 307 | + addrs[i] = buf[j] + (page_size * i); |
281 | + } | 308 | + } |
282 | + | 309 | + |
283 | + buffer_is_zero_dsa_batch_async(&batch_task, | 310 | + buffer_is_zero_dsa_batch_sync(task, |
284 | + (const void**) addrs, | 311 | + (const void **)addrs, |
285 | + batch_size, | 312 | + batch_size, |
286 | + page_size); | 313 | + page_size); |
287 | + | 314 | + |
288 | + bool is_zero; | 315 | + bool is_zero; |
289 | + for (int i = 0; i < batch_size; i++) { | 316 | + for (int i = 0; i < batch_size; i++) { |
290 | + is_zero = buffer_is_zero((const void*) &buf[j][page_size * i], page_size); | 317 | + is_zero = buffer_is_zero((const void *)&buf[j][page_size * i], |
291 | + g_assert(batch_task.results[i] == is_zero); | 318 | + page_size); |
292 | + } | 319 | + g_assert(task->results[i] == is_zero); |
320 | + } | ||
321 | + buffer_zero_batch_task_destroy(task); | ||
293 | + } | 322 | + } |
294 | + | 323 | + |
295 | + assert(!munmap(buf[0], page_size * batch_size)); | 324 | + assert(!munmap(buf[0], page_size * batch_size)); |
296 | + free(buf[1]); | 325 | + free(buf[1]); |
297 | + dsa_cleanup(); | 326 | + qemu_dsa_cleanup(); |
298 | +} | 327 | +} |
299 | + | 328 | + |
300 | +static void test_various_buffer_sizes(void) | 329 | +static void test_various_buffer_sizes(void) |
301 | +{ | 330 | +{ |
302 | + g_assert(!dsa_init(path1)); | 331 | + g_assert(!qemu_dsa_init(path1, errp)); |
303 | + dsa_start(); | 332 | + qemu_dsa_start(); |
304 | + | 333 | + |
305 | + int len = 1 << 4; | 334 | + char *buf = malloc(max_buffer_size * batch_size); |
306 | + for (int count = 12; count > 0; count--, len <<= 1) { | 335 | + char *addrs[batch_size]; |
307 | + buffer_zero_batch_task_init(&batch_task, batch_size); | 336 | + |
308 | + | 337 | + for (int len = 16; len <= max_buffer_size; len *= 2) { |
309 | + char buf[len * batch_size]; | 338 | + task = buffer_zero_batch_task_init(batch_size); |
310 | + char *addrs[batch_size]; | 339 | + |
311 | + for (int i = 0; i < batch_size; i++) { | 340 | + for (int i = 0; i < batch_size; i++) { |
312 | + addrs[i] = buf + (len * i); | 341 | + addrs[i] = buf + (len * i); |
313 | + } | 342 | + } |
314 | + | 343 | + |
315 | + buffer_is_zero_dsa_batch_async(&batch_task, | 344 | + buffer_is_zero_dsa_batch_sync(task, |
316 | + (const void**) addrs, | 345 | + (const void **)addrs, |
317 | + batch_size, | 346 | + batch_size, |
318 | + len); | 347 | + len); |
319 | + | 348 | + |
320 | + bool is_zero; | 349 | + bool is_zero; |
321 | + for (int j = 0; j < batch_size; j++) { | 350 | + for (int j = 0; j < batch_size; j++) { |
322 | + is_zero = buffer_is_zero((const void*) &buf[len * j], len); | 351 | + is_zero = buffer_is_zero((const void *)&buf[len * j], len); |
323 | + g_assert(batch_task.results[j] == is_zero); | 352 | + g_assert(task->results[j] == is_zero); |
324 | + } | 353 | + } |
325 | + } | 354 | + |
326 | + | 355 | + buffer_zero_batch_task_destroy(task); |
327 | + dsa_cleanup(); | 356 | + } |
357 | + | ||
358 | + free(buf); | ||
359 | + | ||
360 | + qemu_dsa_cleanup(); | ||
328 | +} | 361 | +} |
329 | + | 362 | + |
330 | +static void test_various_buffer_sizes_async(void) | 363 | +static void test_various_buffer_sizes_async(void) |
331 | +{ | 364 | +{ |
332 | + test_various_buffer_sizes(); | 365 | + test_various_buffer_sizes(); |
333 | +} | 366 | +} |
334 | + | 367 | + |
335 | +static void test_double_start_stop(void) | 368 | +static void test_double_start_stop(void) |
336 | +{ | 369 | +{ |
337 | + g_assert(!dsa_init(path1)); | 370 | + g_assert(!qemu_dsa_init(path1, errp)); |
338 | + // Double start | 371 | + /* Double start */ |
339 | + dsa_start(); | 372 | + qemu_dsa_start(); |
340 | + dsa_start(); | 373 | + qemu_dsa_start(); |
341 | + g_assert(dsa_is_running()); | 374 | + g_assert(qemu_dsa_is_running()); |
342 | + do_single_task(); | 375 | + do_single_task(); |
343 | + | 376 | + |
344 | + // Double stop | 377 | + /* Double stop */ |
345 | + dsa_stop(); | 378 | + qemu_dsa_stop(); |
346 | + g_assert(!dsa_is_running()); | 379 | + g_assert(!qemu_dsa_is_running()); |
347 | + dsa_stop(); | 380 | + qemu_dsa_stop(); |
348 | + g_assert(!dsa_is_running()); | 381 | + g_assert(!qemu_dsa_is_running()); |
349 | + | 382 | + |
350 | + // Restart | 383 | + /* Restart */ |
351 | + dsa_start(); | 384 | + qemu_dsa_start(); |
352 | + g_assert(dsa_is_running()); | 385 | + g_assert(qemu_dsa_is_running()); |
353 | + do_single_task(); | 386 | + do_single_task(); |
354 | + dsa_cleanup(); | 387 | + qemu_dsa_cleanup(); |
355 | +} | 388 | +} |
356 | + | 389 | + |
357 | +static void test_is_running(void) | 390 | +static void test_is_running(void) |
358 | +{ | 391 | +{ |
359 | + g_assert(!dsa_init(path1)); | 392 | + g_assert(!qemu_dsa_init(path1, errp)); |
360 | + | 393 | + |
361 | + g_assert(!dsa_is_running()); | 394 | + g_assert(!qemu_dsa_is_running()); |
362 | + dsa_start(); | 395 | + qemu_dsa_start(); |
363 | + g_assert(dsa_is_running()); | 396 | + g_assert(qemu_dsa_is_running()); |
364 | + dsa_stop(); | 397 | + qemu_dsa_stop(); |
365 | + g_assert(!dsa_is_running()); | 398 | + g_assert(!qemu_dsa_is_running()); |
366 | + dsa_cleanup(); | 399 | + qemu_dsa_cleanup(); |
367 | +} | 400 | +} |
368 | + | 401 | + |
369 | +static void test_multiple_engines(void) | 402 | +static void test_multiple_engines(void) |
370 | +{ | 403 | +{ |
371 | + g_assert(!dsa_init(path2)); | 404 | + g_assert(!qemu_dsa_init(path2, errp)); |
372 | + dsa_start(); | 405 | + qemu_dsa_start(); |
373 | + | 406 | + |
374 | + struct buffer_zero_batch_task tasks[num_devices] | 407 | + QemuDsaBatchTask *tasks[num_devices]; |
375 | + __attribute__((aligned(64))); | ||
376 | + char bufs[num_devices][page_size * batch_size]; | 408 | + char bufs[num_devices][page_size * batch_size]; |
377 | + char *addrs[num_devices][batch_size]; | 409 | + char *addrs[num_devices][batch_size]; |
378 | + | 410 | + |
379 | + // This is a somewhat implementation-specific way of testing that the tasks | 411 | + /* |
380 | + // have unique engines assigned to them. | 412 | + * This is a somewhat implementation-specific way |
381 | + buffer_zero_batch_task_init(&tasks[0], batch_size); | 413 | + * of testing that the tasks have unique engines |
382 | + buffer_zero_batch_task_init(&tasks[1], batch_size); | 414 | + * assigned to them. |
383 | + g_assert(tasks[0].device != tasks[1].device); | 415 | + */ |
416 | + tasks[0] = buffer_zero_batch_task_init(batch_size); | ||
417 | + tasks[1] = buffer_zero_batch_task_init(batch_size); | ||
418 | + g_assert(tasks[0]->device != tasks[1]->device); | ||
384 | + | 419 | + |
385 | + for (int i = 0; i < num_devices; i++) { | 420 | + for (int i = 0; i < num_devices; i++) { |
386 | + for (int j = 0; j < batch_size; j++) { | 421 | + for (int j = 0; j < batch_size; j++) { |
387 | + addrs[i][j] = bufs[i] + (page_size * j); | 422 | + addrs[i][j] = bufs[i] + (page_size * j); |
388 | + } | 423 | + } |
389 | + | 424 | + |
390 | + buffer_is_zero_dsa_batch_async(&tasks[i], | 425 | + buffer_is_zero_dsa_batch_sync(tasks[i], |
391 | + (const void**) addrs[i], | 426 | + (const void **)addrs[i], |
392 | + batch_size, page_size); | 427 | + batch_size, page_size); |
393 | + | 428 | + |
394 | + bool is_zero; | 429 | + bool is_zero; |
395 | + for (int j = 0; j < batch_size; j++) { | 430 | + for (int j = 0; j < batch_size; j++) { |
396 | + is_zero = buffer_is_zero((const void*) &bufs[i][page_size * j], | 431 | + is_zero = buffer_is_zero((const void *)&bufs[i][page_size * j], |
397 | + page_size); | 432 | + page_size); |
398 | + g_assert(tasks[i].results[j] == is_zero); | 433 | + g_assert(tasks[i]->results[j] == is_zero); |
399 | + } | 434 | + } |
400 | + } | 435 | + } |
401 | + | 436 | + |
402 | + dsa_cleanup(); | 437 | + buffer_zero_batch_task_destroy(tasks[0]); |
438 | + buffer_zero_batch_task_destroy(tasks[1]); | ||
439 | + | ||
440 | + qemu_dsa_cleanup(); | ||
403 | +} | 441 | +} |
404 | + | 442 | + |
405 | +static void test_configure_dsa_twice(void) | 443 | +static void test_configure_dsa_twice(void) |
406 | +{ | 444 | +{ |
407 | + g_assert(!dsa_init(path2)); | 445 | + g_assert(!qemu_dsa_init(path2, errp)); |
408 | + g_assert(!dsa_init(path2)); | 446 | + g_assert(!qemu_dsa_init(path2, errp)); |
409 | + dsa_start(); | 447 | + qemu_dsa_start(); |
410 | + do_single_task(); | 448 | + do_single_task(); |
411 | + dsa_cleanup(); | 449 | + qemu_dsa_cleanup(); |
412 | +} | 450 | +} |
413 | + | 451 | + |
414 | +static void test_configure_dsa_bad_path(void) | 452 | +static void test_configure_dsa_bad_path(void) |
415 | +{ | 453 | +{ |
416 | + const char* bad_path = "/not/a/real/path"; | 454 | + const strList *bad_path = &(strList) { |
417 | + g_assert(dsa_init(bad_path)); | 455 | + .value = (char *)"/not/a/real/path", .next = NULL |
456 | + }; | ||
457 | + g_assert(qemu_dsa_init(bad_path, errp)); | ||
418 | +} | 458 | +} |
419 | + | 459 | + |
420 | +static void test_cleanup_before_configure(void) | 460 | +static void test_cleanup_before_configure(void) |
421 | +{ | 461 | +{ |
422 | + dsa_cleanup(); | 462 | + qemu_dsa_cleanup(); |
423 | + g_assert(!dsa_init(path2)); | 463 | + g_assert(!qemu_dsa_init(path2, errp)); |
424 | +} | 464 | +} |
425 | + | 465 | + |
426 | +static void test_configure_dsa_num_devices(void) | 466 | +static void test_configure_dsa_num_devices(void) |
427 | +{ | 467 | +{ |
428 | + g_assert(!dsa_init(path1)); | 468 | + g_assert(!qemu_dsa_init(path1, errp)); |
429 | + dsa_start(); | 469 | + qemu_dsa_start(); |
430 | + | 470 | + |
431 | + do_single_task(); | 471 | + do_single_task(); |
432 | + dsa_stop(); | 472 | + qemu_dsa_stop(); |
433 | + dsa_cleanup(); | 473 | + qemu_dsa_cleanup(); |
434 | +} | 474 | +} |
435 | + | 475 | + |
436 | +static void test_cleanup_twice(void) | 476 | +static void test_cleanup_twice(void) |
437 | +{ | 477 | +{ |
438 | + g_assert(!dsa_init(path2)); | 478 | + g_assert(!qemu_dsa_init(path2, errp)); |
439 | + dsa_cleanup(); | 479 | + qemu_dsa_cleanup(); |
440 | + dsa_cleanup(); | 480 | + qemu_dsa_cleanup(); |
441 | + | 481 | + |
442 | + g_assert(!dsa_init(path2)); | 482 | + g_assert(!qemu_dsa_init(path2, errp)); |
443 | + dsa_start(); | 483 | + qemu_dsa_start(); |
444 | + do_single_task(); | 484 | + do_single_task(); |
445 | + dsa_cleanup(); | 485 | + qemu_dsa_cleanup(); |
446 | +} | 486 | +} |
447 | + | 487 | + |
448 | +static int check_test_setup(void) | 488 | +static int check_test_setup(void) |
449 | +{ | 489 | +{ |
450 | + const char *path[2] = {path1, path2}; | 490 | + const strList *path[2] = {path1, path2}; |
451 | + for (int i = 0; i < sizeof(path) / sizeof(char *); i++) { | 491 | + for (int i = 0; i < sizeof(path) / sizeof(strList *); i++) { |
452 | + if (!dsa_init(path[i])) { | 492 | + if (qemu_dsa_init(path[i], errp)) { |
453 | + return -1; | 493 | + return -1; |
454 | + } | 494 | + } |
455 | + dsa_cleanup(); | 495 | + qemu_dsa_cleanup(); |
456 | + } | 496 | + } |
457 | + return 0; | 497 | + return 0; |
458 | +} | 498 | +} |
459 | + | 499 | + |
460 | +int main(int argc, char **argv) | 500 | +int main(int argc, char **argv) |
... | ... | ||
465 | + /* | 505 | + /* |
466 | + * This test requires extra setup. The current | 506 | + * This test requires extra setup. The current |
467 | + * setup is not correct. Just skip this test | 507 | + * setup is not correct. Just skip this test |
468 | + * for now. | 508 | + * for now. |
469 | + */ | 509 | + */ |
470 | + exit(0); | 510 | + g_test_skip("DSA hardware is not configured properly."); |
511 | + return g_test_run(); | ||
471 | + } | 512 | + } |
472 | + | 513 | + |
473 | + if (num_devices > 1) { | 514 | + if (num_devices > 1) { |
474 | + g_test_add_func("/dsa/multiple_engines", test_multiple_engines); | 515 | + g_test_add_func("/dsa/multiple_engines", test_multiple_engines); |
475 | + } | 516 | + } |
... | ... | ||
498 | + g_test_add_func("/dsa/cleanup_twice", test_cleanup_twice); | 539 | + g_test_add_func("/dsa/cleanup_twice", test_cleanup_twice); |
499 | + | 540 | + |
500 | + return g_test_run(); | 541 | + return g_test_run(); |
501 | +} | 542 | +} |
502 | -- | 543 | -- |
503 | 2.30.2 | 544 | Yichen Wang | diff view generated by jsdifflib |
1 | From: Hao Xiang <hao.xiang@linux.dev> | ||
---|---|---|---|
2 | |||
1 | * Add test case to start and complete multifd live migration with DSA | 3 | * Add test case to start and complete multifd live migration with DSA |
2 | offloading enabled. | 4 | offloading enabled. |
3 | * Add test case to start and cancel multifd live migration with DSA | 5 | * Add test case to start and cancel multifd live migration with DSA |
4 | offloading enabled. | 6 | offloading enabled. |
5 | 7 | ||
6 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> | 8 | Signed-off-by: Bryan Zhang <bryan.zhang@bytedance.com> |
7 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 9 | Signed-off-by: Hao Xiang <hao.xiang@linux.dev> |
10 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
11 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
8 | --- | 12 | --- |
9 | tests/qtest/migration-test.c | 77 +++++++++++++++++++++++++++++++++++- | 13 | tests/qtest/meson.build | 10 +++++- |
10 | 1 file changed, 76 insertions(+), 1 deletion(-) | 14 | tests/qtest/migration-test.c | 3 ++ |
15 | tests/qtest/migration/dsa-tests.c | 59 +++++++++++++++++++++++++++++++ | ||
16 | tests/qtest/migration/framework.h | 1 + | ||
17 | 4 files changed, 72 insertions(+), 1 deletion(-) | ||
18 | create mode 100644 tests/qtest/migration/dsa-tests.c | ||
11 | 19 | ||
20 | diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/tests/qtest/meson.build | ||
23 | +++ b/tests/qtest/meson.build | ||
24 | @@ -XXX,XX +XXX,XX @@ if gnutls.found() | ||
25 | endif | ||
26 | endif | ||
27 | |||
28 | +migration_dsa_files = [] | ||
29 | +if config_host_data.get('CONFIG_DSA_OPT') | ||
30 | + migration_dsa_files = [files( | ||
31 | + 'migration/dsa-tests.c', | ||
32 | + )] | ||
33 | +endif | ||
34 | + | ||
35 | qtests = { | ||
36 | 'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'], | ||
37 | 'cdrom-test': files('boot-sector.c'), | ||
38 | @@ -XXX,XX +XXX,XX @@ qtests = { | ||
39 | 'migration/migration-util.c') + dbus_vmstate1, | ||
40 | 'erst-test': files('erst-test.c'), | ||
41 | 'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'], | ||
42 | - 'migration-test': migration_files + migration_tls_files, | ||
43 | + 'migration-test': migration_files + migration_tls_files + \ | ||
44 | + migration_dsa_files, | ||
45 | 'pxe-test': files('boot-sector.c'), | ||
46 | 'pnv-xive2-test': files('pnv-xive2-common.c', 'pnv-xive2-flush-sync.c'), | ||
47 | 'qos-test': [chardev, io, qos_test_ss.apply({}).sources()], | ||
12 | diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c | 48 | diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c |
13 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tests/qtest/migration-test.c | 50 | --- a/tests/qtest/migration-test.c |
15 | +++ b/tests/qtest/migration-test.c | 51 | +++ b/tests/qtest/migration-test.c |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 52 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
17 | const char *opts_target; | 53 | migration_test_add_precopy(env); |
18 | } MigrateStart; | 54 | migration_test_add_cpr(env); |
19 | 55 | migration_test_add_misc(env); | |
56 | +#ifdef CONFIG_DSA_OPT | ||
57 | + migration_test_add_dsa(env); | ||
58 | +#endif | ||
59 | |||
60 | ret = g_test_run(); | ||
61 | |||
62 | diff --git a/tests/qtest/migration/dsa-tests.c b/tests/qtest/migration/dsa-tests.c | ||
63 | new file mode 100644 | ||
64 | index XXXXXXX..XXXXXXX | ||
65 | --- /dev/null | ||
66 | +++ b/tests/qtest/migration/dsa-tests.c | ||
67 | @@ -XXX,XX +XXX,XX @@ | ||
68 | +/* | ||
69 | + * QTest testcases for DSA accelerator | ||
70 | + * | ||
71 | + * Copyright (C) Bytedance Ltd. | ||
72 | + * based on the vhost-user-test.c that is: | ||
73 | + * Copyright (c) 2014 Virtual Open Systems Sarl. | ||
74 | + * | ||
75 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
76 | + * See the COPYING file in the top-level directory. | ||
77 | + */ | ||
78 | + | ||
79 | +#include "qemu/osdep.h" | ||
80 | +#include "libqtest.h" | ||
81 | +#include "migration/framework.h" | ||
82 | +#include "migration/migration-qmp.h" | ||
83 | +#include "migration/migration-util.h" | ||
84 | + | ||
20 | +/* | 85 | +/* |
21 | + * It requires separate steps to configure and enable DSA device. | 86 | + * It requires separate steps to configure and enable DSA device. |
22 | + * This test assumes that the configuration is done already. | 87 | + * This test assumes that the configuration is done already. |
23 | + */ | 88 | + */ |
24 | +static const char* dsa_dev_path = "/dev/dsa/wq4.0"; | 89 | +static const char *dsa_dev_path_p = "['dsa:/dev/dsa/wq4.0']"; |
25 | + | 90 | +static const char *dsa_dev_path = "/dev/dsa/wq4.0"; |
26 | /* | ||
27 | * A hook that runs after the src and dst QEMUs have been | ||
28 | * created, but before the migration is started. This can | ||
29 | @@ -XXX,XX +XXX,XX @@ static void test_multifd_tcp_tls_x509_reject_anon_client(void) | ||
30 | * | ||
31 | * And see that it works | ||
32 | */ | ||
33 | -static void test_multifd_tcp_cancel(void) | ||
34 | +static void test_multifd_tcp_cancel_common(bool use_dsa) | ||
35 | { | ||
36 | MigrateStart args = { | ||
37 | .hide_stderr = true, | ||
38 | @@ -XXX,XX +XXX,XX @@ static void test_multifd_tcp_cancel(void) | ||
39 | migrate_set_capability(from, "multifd", true); | ||
40 | migrate_set_capability(to, "multifd", true); | ||
41 | |||
42 | + if (use_dsa) { | ||
43 | + migrate_set_parameter_str(from, "multifd-dsa-accel", dsa_dev_path); | ||
44 | + } | ||
45 | + | ||
46 | /* Start incoming migration from the 1st socket */ | ||
47 | migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}"); | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ static void test_multifd_tcp_cancel(void) | ||
50 | test_migrate_end(from, to2, true); | ||
51 | } | ||
52 | |||
53 | +/* | ||
54 | + * This test does: | ||
55 | + * source target | ||
56 | + * migrate_incoming | ||
57 | + * migrate | ||
58 | + * migrate_cancel | ||
59 | + * launch another target | ||
60 | + * migrate | ||
61 | + * | ||
62 | + * And see that it works | ||
63 | + */ | ||
64 | +static void test_multifd_tcp_cancel(void) | ||
65 | +{ | ||
66 | + test_multifd_tcp_cancel_common(false); | ||
67 | +} | ||
68 | + | ||
69 | +#ifdef CONFIG_DSA_OPT | ||
70 | + | ||
71 | +static void *test_migrate_precopy_tcp_multifd_start_dsa(QTestState *from, | ||
72 | + QTestState *to) | ||
73 | +{ | ||
74 | + migrate_set_parameter_str(from, "multifd-dsa-accel", dsa_dev_path); | ||
75 | + return test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); | ||
76 | +} | ||
77 | + | ||
78 | +static void test_multifd_tcp_none_dsa(void) | ||
79 | +{ | ||
80 | + MigrateCommon args = { | ||
81 | + .listen_uri = "defer", | ||
82 | + .start_hook = test_migrate_precopy_tcp_multifd_start_dsa, | ||
83 | + }; | ||
84 | + | ||
85 | + test_precopy_common(&args); | ||
86 | +} | ||
87 | + | ||
88 | +static void test_multifd_tcp_cancel_dsa(void) | ||
89 | +{ | ||
90 | + test_multifd_tcp_cancel_common(true); | ||
91 | +} | ||
92 | + | ||
93 | +#endif | ||
94 | + | ||
95 | static void calc_dirty_rate(QTestState *who, uint64_t calc_time) | ||
96 | { | ||
97 | qtest_qmp_assert_success(who, | ||
98 | @@ -XXX,XX +XXX,XX @@ static bool kvm_dirty_ring_supported(void) | ||
99 | #endif | ||
100 | } | ||
101 | |||
102 | +#ifdef CONFIG_DSA_OPT | ||
103 | +static int test_dsa_setup(void) | 91 | +static int test_dsa_setup(void) |
104 | +{ | 92 | +{ |
105 | + int fd; | 93 | + int fd; |
106 | + fd = open(dsa_dev_path, O_RDWR); | 94 | + fd = open(dsa_dev_path, O_RDWR); |
107 | + if (fd < 0) { | 95 | + if (fd < 0) { |
108 | + return -1; | 96 | + return -1; |
109 | + } | 97 | + } |
110 | + close(fd); | 98 | + close(fd); |
111 | + return 0; | 99 | + return 0; |
112 | +} | 100 | +} |
113 | +#endif | ||
114 | + | 101 | + |
115 | int main(int argc, char **argv) | 102 | +static void *test_migrate_precopy_tcp_multifd_start_dsa(QTestState *from, |
116 | { | 103 | + QTestState *to) |
117 | bool has_kvm, has_tcg; | 104 | +{ |
118 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | 105 | + migrate_set_parameter_str(from, "zero-page-detection", "dsa-accel"); |
119 | } | 106 | + migrate_set_parameter_str(from, "accel-path", dsa_dev_path_p); |
120 | qtest_add_func("/migration/multifd/tcp/plain/none", | 107 | + return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); |
121 | test_multifd_tcp_none); | 108 | +} |
122 | + | 109 | + |
123 | +#ifdef CONFIG_DSA_OPT | 110 | +static void test_multifd_tcp_zero_page_dsa(void) |
124 | + if (g_str_equal(arch, "x86_64") && test_dsa_setup() == 0) { | 111 | +{ |
125 | + qtest_add_func("/migration/multifd/tcp/plain/none/dsa", | 112 | + MigrateCommon args = { |
126 | + test_multifd_tcp_none_dsa); | 113 | + .listen_uri = "defer", |
127 | + qtest_add_func("/migration/multifd/tcp/plain/cancel/dsa", | 114 | + .start_hook = test_migrate_precopy_tcp_multifd_start_dsa, |
128 | + test_multifd_tcp_cancel_dsa); | 115 | + }; |
116 | + | ||
117 | + test_precopy_common(&args); | ||
118 | +} | ||
119 | + | ||
120 | +void migration_test_add_dsa(MigrationTestEnv *env) | ||
121 | +{ | ||
122 | + if (test_dsa_setup() == 0) { | ||
123 | + migration_test_add("/migration/multifd/tcp/plain/zero-page/dsa", | ||
124 | + test_multifd_tcp_zero_page_dsa); | ||
129 | + } | 125 | + } |
130 | +#endif | 126 | +} |
131 | + | 127 | diff --git a/tests/qtest/migration/framework.h b/tests/qtest/migration/framework.h |
132 | /* | 128 | index XXXXXXX..XXXXXXX 100644 |
133 | * This test is flaky and sometimes fails in CI and otherwise: | 129 | --- a/tests/qtest/migration/framework.h |
134 | * don't run unless user opts in via environment variable. | 130 | +++ b/tests/qtest/migration/framework.h |
131 | @@ -XXX,XX +XXX,XX @@ void migration_test_add_file(MigrationTestEnv *env); | ||
132 | void migration_test_add_precopy(MigrationTestEnv *env); | ||
133 | void migration_test_add_cpr(MigrationTestEnv *env); | ||
134 | void migration_test_add_misc(MigrationTestEnv *env); | ||
135 | +void migration_test_add_dsa(MigrationTestEnv *env); | ||
136 | |||
137 | #endif /* TEST_FRAMEWORK_H */ | ||
135 | -- | 138 | -- |
136 | 2.30.2 | 139 | Yichen Wang | diff view generated by jsdifflib |
1 | Idxd is the device driver for DSA (Intel Data Streaming | 1 | From: Yuan Liu <yuan1.liu@intel.com> |
---|---|---|---|
2 | Accelerator). The driver is fully functioning since Linux | ||
3 | kernel 5.19. This change adds the driver's header file used | ||
4 | for userspace development. | ||
5 | 2 | ||
6 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | 3 | Signed-off-by: Yuan Liu <yuan1.liu@intel.com> |
4 | Signed-off-by: Yichen Wang <yichen.wang@bytedance.com> | ||
5 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
7 | --- | 6 | --- |
8 | linux-headers/linux/idxd.h | 356 +++++++++++++++++++++++++++++++++++++ | 7 | .../migration/dsa-zero-page-detection.rst | 290 ++++++++++++++++++ |
9 | 1 file changed, 356 insertions(+) | 8 | docs/devel/migration/features.rst | 1 + |
10 | create mode 100644 linux-headers/linux/idxd.h | 9 | 2 files changed, 291 insertions(+) |
10 | create mode 100644 docs/devel/migration/dsa-zero-page-detection.rst | ||
11 | 11 | ||
12 | diff --git a/linux-headers/linux/idxd.h b/linux-headers/linux/idxd.h | 12 | diff --git a/docs/devel/migration/dsa-zero-page-detection.rst b/docs/devel/migration/dsa-zero-page-detection.rst |
13 | new file mode 100644 | 13 | new file mode 100644 |
14 | index XXXXXXX..XXXXXXX | 14 | index XXXXXXX..XXXXXXX |
15 | --- /dev/null | 15 | --- /dev/null |
16 | +++ b/linux-headers/linux/idxd.h | 16 | +++ b/docs/devel/migration/dsa-zero-page-detection.rst |
17 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ |
18 | +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ | 18 | +============================= |
19 | +/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ | 19 | +DSA-Based Zero Page Detection |
20 | +#ifndef _USR_IDXD_H_ | 20 | +============================= |
21 | +#define _USR_IDXD_H_ | 21 | +Intel Data Streaming Accelerator(``DSA``) is introduced in Intel's 4th |
22 | + | 22 | +generation Xeon server, aka Sapphire Rapids(``SPR``). One of the things |
23 | +#ifdef __KERNEL__ | 23 | +DSA can do is to offload memory comparison workload from CPU to DSA accelerator |
24 | +#include <linux/types.h> | 24 | +hardware. |
25 | +#else | 25 | + |
26 | +#include <stdint.h> | 26 | +The main advantages of using DSA to accelerate zero pages detection include |
27 | +#endif | 27 | + |
28 | + | 28 | +1. Reduces CPU usage in multifd live migration workflow across all use cases. |
29 | +/* Driver command error status */ | 29 | + |
30 | +enum idxd_scmd_stat { | 30 | +2. Reduces migration total time in some use cases. |
31 | + IDXD_SCMD_DEV_ENABLED = 0x80000010, | 31 | + |
32 | + IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020, | 32 | + |
33 | + IDXD_SCMD_WQ_ENABLED = 0x80000021, | 33 | +DSA-Based Zero Page Detection Introduction |
34 | + IDXD_SCMD_DEV_DMA_ERR = 0x80020000, | 34 | +========================================== |
35 | + IDXD_SCMD_WQ_NO_GRP = 0x80030000, | 35 | + |
36 | + IDXD_SCMD_WQ_NO_NAME = 0x80040000, | 36 | +:: |
37 | + IDXD_SCMD_WQ_NO_SVM = 0x80050000, | 37 | + |
38 | + IDXD_SCMD_WQ_NO_THRESH = 0x80060000, | 38 | + |
39 | + IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000, | 39 | + +----------------+ +------------------+ |
40 | + IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000, | 40 | + | MultiFD Thread | |accel-config tool | |
41 | + IDXD_SCMD_PERCPU_ERR = 0x80090000, | 41 | + +-+--------+-----+ +--------+---------+ |
42 | + IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000, | 42 | + | | | |
43 | + IDXD_SCMD_CDEV_ERR = 0x800b0000, | 43 | + | | Open DSA | Setup DSA |
44 | + IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, | 44 | + | | Work Queues | Resources |
45 | + IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, | 45 | + | | +-----+-----+ | |
46 | + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, | 46 | + | +------>|idxd driver|<-+ |
47 | + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, | 47 | + | +-----+-----+ |
48 | + IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, | 48 | + | | |
49 | + IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, | 49 | + | | |
50 | +}; | 50 | + | +-----+-----+ |
51 | + | 51 | + +----------------+DSA Devices| |
52 | +#define IDXD_SCMD_SOFTERR_MASK 0x80000000 | 52 | + Submit jobs +-----------+ |
53 | +#define IDXD_SCMD_SOFTERR_SHIFT 16 | 53 | + via enqcmd |
54 | + | 54 | + |
55 | +/* Descriptor flags */ | 55 | + |
56 | +#define IDXD_OP_FLAG_FENCE 0x0001 | 56 | +DSA Introduction |
57 | +#define IDXD_OP_FLAG_BOF 0x0002 | 57 | +---------------- |
58 | +#define IDXD_OP_FLAG_CRAV 0x0004 | 58 | +Intel Data Streaming Accelerator (DSA) is a high-performance data copy and |
59 | +#define IDXD_OP_FLAG_RCR 0x0008 | 59 | +transformation accelerator that is integrated in Intel Xeon processors, |
60 | +#define IDXD_OP_FLAG_RCI 0x0010 | 60 | +targeted for optimizing streaming data movement and transformation operations |
61 | +#define IDXD_OP_FLAG_CRSTS 0x0020 | 61 | +common with applications for high-performance storage, networking, persistent |
62 | +#define IDXD_OP_FLAG_CR 0x0080 | 62 | +memory, and various data processing applications. |
63 | +#define IDXD_OP_FLAG_CC 0x0100 | 63 | + |
64 | +#define IDXD_OP_FLAG_ADDR1_TCS 0x0200 | 64 | +For more ``DSA`` introduction, please refer to `DSA Introduction |
65 | +#define IDXD_OP_FLAG_ADDR2_TCS 0x0400 | 65 | +<https://www.intel.com/content/www/us/en/products/docs/accelerator-engines/data-streaming-accelerator.html>`_ |
66 | +#define IDXD_OP_FLAG_ADDR3_TCS 0x0800 | 66 | + |
67 | +#define IDXD_OP_FLAG_CR_TCS 0x1000 | 67 | +For ``DSA`` specification, please refer to `DSA Specification |
68 | +#define IDXD_OP_FLAG_STORD 0x2000 | 68 | +<https://cdrdv2-public.intel.com/671116/341204-intel-data-streaming-accelerator-spec.pdf>`_ |
69 | +#define IDXD_OP_FLAG_DRDBK 0x4000 | 69 | + |
70 | +#define IDXD_OP_FLAG_DSTS 0x8000 | 70 | +For ``DSA`` user guide, please refer to `DSA User Guide |
71 | + | 71 | +<https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html>`_ |
72 | +/* IAX */ | 72 | + |
73 | +#define IDXD_OP_FLAG_RD_SRC2_AECS 0x010000 | 73 | +DSA Device Management |
74 | +#define IDXD_OP_FLAG_RD_SRC2_2ND 0x020000 | 74 | +--------------------- |
75 | +#define IDXD_OP_FLAG_WR_SRC2_AECS_COMP 0x040000 | 75 | + |
76 | +#define IDXD_OP_FLAG_WR_SRC2_AECS_OVFL 0x080000 | 76 | +The number of ``DSA`` devices will vary depending on the Xeon product model. |
77 | +#define IDXD_OP_FLAG_SRC2_STS 0x100000 | 77 | +On a ``SPR`` server, there can be a maximum of 8 ``DSA`` devices, with up to |
78 | +#define IDXD_OP_FLAG_CRC_RFC3720 0x200000 | 78 | +4 devices per socket. |
79 | + | 79 | + |
80 | +/* Opcode */ | 80 | +By default, all ``DSA`` devices are disabled and need to be configured and |
81 | +enum dsa_opcode { | 81 | +enabled by users manually. |
82 | + DSA_OPCODE_NOOP = 0, | 82 | + |
83 | + DSA_OPCODE_BATCH, | 83 | +Check the number of devices through the following command |
84 | + DSA_OPCODE_DRAIN, | 84 | + |
85 | + DSA_OPCODE_MEMMOVE, | 85 | +.. code-block:: shell |
86 | + DSA_OPCODE_MEMFILL, | 86 | + |
87 | + DSA_OPCODE_COMPARE, | 87 | + #lspci -d 8086:0b25 |
88 | + DSA_OPCODE_COMPVAL, | 88 | + 6a:01.0 System peripheral: Intel Corporation Device 0b25 |
89 | + DSA_OPCODE_CR_DELTA, | 89 | + 6f:01.0 System peripheral: Intel Corporation Device 0b25 |
90 | + DSA_OPCODE_AP_DELTA, | 90 | + 74:01.0 System peripheral: Intel Corporation Device 0b25 |
91 | + DSA_OPCODE_DUALCAST, | 91 | + 79:01.0 System peripheral: Intel Corporation Device 0b25 |
92 | + DSA_OPCODE_CRCGEN = 0x10, | 92 | + e7:01.0 System peripheral: Intel Corporation Device 0b25 |
93 | + DSA_OPCODE_COPY_CRC, | 93 | + ec:01.0 System peripheral: Intel Corporation Device 0b25 |
94 | + DSA_OPCODE_DIF_CHECK, | 94 | + f1:01.0 System peripheral: Intel Corporation Device 0b25 |
95 | + DSA_OPCODE_DIF_INS, | 95 | + f6:01.0 System peripheral: Intel Corporation Device 0b25 |
96 | + DSA_OPCODE_DIF_STRP, | 96 | + |
97 | + DSA_OPCODE_DIF_UPDT, | 97 | + |
98 | + DSA_OPCODE_CFLUSH = 0x20, | 98 | +DSA Device Configuration And Enabling |
99 | +}; | 99 | +------------------------------------- |
100 | + | 100 | + |
101 | +enum iax_opcode { | 101 | +The ``accel-config`` tool is used to enable ``DSA`` devices and configure |
102 | + IAX_OPCODE_NOOP = 0, | 102 | +``DSA`` hardware resources(work queues and engines). One ``DSA`` device |
103 | + IAX_OPCODE_DRAIN = 2, | 103 | +has 8 work queues and 4 processing engines, multiple engines can be assigned |
104 | + IAX_OPCODE_MEMMOVE, | 104 | +to a work queue via ``group`` attribute. |
105 | + IAX_OPCODE_DECOMPRESS = 0x42, | 105 | + |
106 | + IAX_OPCODE_COMPRESS, | 106 | +For ``accel-config`` installation, please refer to `accel-config installation |
107 | + IAX_OPCODE_CRC64, | 107 | +<https://github.com/intel/idxd-config>`_ |
108 | + IAX_OPCODE_ZERO_DECOMP_32 = 0x48, | 108 | + |
109 | + IAX_OPCODE_ZERO_DECOMP_16, | 109 | +One example of configuring and enabling an ``DSA`` device. |
110 | + IAX_OPCODE_ZERO_COMP_32 = 0x4c, | 110 | + |
111 | + IAX_OPCODE_ZERO_COMP_16, | 111 | +.. code-block:: shell |
112 | + IAX_OPCODE_SCAN = 0x50, | 112 | + |
113 | + IAX_OPCODE_SET_MEMBER, | 113 | + #accel-config config-engine dsa0/engine0.0 -g 0 |
114 | + IAX_OPCODE_EXTRACT, | 114 | + #accel-config config-engine dsa0/engine0.1 -g 0 |
115 | + IAX_OPCODE_SELECT, | 115 | + #accel-config config-engine dsa0/engine0.2 -g 0 |
116 | + IAX_OPCODE_RLE_BURST, | 116 | + #accel-config config-engine dsa0/engine0.3 -g 0 |
117 | + IAX_OPCODE_FIND_UNIQUE, | 117 | + #accel-config config-wq dsa0/wq0.0 -g 0 -s 128 -p 10 -b 1 -t 128 -m shared -y user -n app1 -d user |
118 | + IAX_OPCODE_EXPAND, | 118 | + #accel-config enable-device dsa0 |
119 | +}; | 119 | + #accel-config enable-wq dsa0/wq0.0 |
120 | + | 120 | + |
121 | +/* Completion record status */ | 121 | +- The ``DSA`` device index is 0, use ``ls -lh /sys/bus/dsa/devices/dsa*`` |
122 | +enum dsa_completion_status { | 122 | + command to query the ``DSA`` device index. |
123 | + DSA_COMP_NONE = 0, | 123 | + |
124 | + DSA_COMP_SUCCESS, | 124 | +- 4 engines and 1 work queue are configured in group 0, so that all zero-page |
125 | + DSA_COMP_SUCCESS_PRED, | 125 | + detection jobs submitted to this work queue can be processed by all engines |
126 | + DSA_COMP_PAGE_FAULT_NOBOF, | 126 | + simultaneously. |
127 | + DSA_COMP_PAGE_FAULT_IR, | 127 | + |
128 | + DSA_COMP_BATCH_FAIL, | 128 | +- Set work queue attributes including the work mode, work queue size and so on. |
129 | + DSA_COMP_BATCH_PAGE_FAULT, | 129 | + |
130 | + DSA_COMP_DR_OFFSET_NOINC, | 130 | +- Enable the ``dsa0`` device and work queue ``dsa0/wq0.0`` |
131 | + DSA_COMP_DR_OFFSET_ERANGE, | 131 | + |
132 | + DSA_COMP_DIF_ERR, | 132 | +.. note:: |
133 | + DSA_COMP_BAD_OPCODE = 0x10, | 133 | + |
134 | + DSA_COMP_INVALID_FLAGS, | 134 | + 1. ``DSA`` device driver is Intel Data Accelerator Driver (idxd), it is |
135 | + DSA_COMP_NOZERO_RESERVE, | 135 | + recommended that the minimum version of Linux kernel is 5.18. |
136 | + DSA_COMP_XFER_ERANGE, | 136 | + |
137 | + DSA_COMP_DESC_CNT_ERANGE, | 137 | + 2. Only ``DSA`` shared work queue mode is supported, it needs to add |
138 | + DSA_COMP_DR_ERANGE, | 138 | + ``"intel_iommu=on,sm_on"`` parameter to kernel command line. |
139 | + DSA_COMP_OVERLAP_BUFFERS, | 139 | + |
140 | + DSA_COMP_DCAST_ERR, | 140 | +For more detailed configuration, please refer to `DSA Configuration Samples |
141 | + DSA_COMP_DESCLIST_ALIGN, | 141 | +<https://github.com/intel/idxd-config/tree/stable/Documentation/accfg>`_ |
142 | + DSA_COMP_INT_HANDLE_INVAL, | 142 | + |
143 | + DSA_COMP_CRA_XLAT, | 143 | + |
144 | + DSA_COMP_CRA_ALIGN, | 144 | +Performances |
145 | + DSA_COMP_ADDR_ALIGN, | 145 | +============ |
146 | + DSA_COMP_PRIV_BAD, | 146 | +We use two Intel 4th generation Xeon servers for testing. |
147 | + DSA_COMP_TRAFFIC_CLASS_CONF, | 147 | + |
148 | + DSA_COMP_PFAULT_RDBA, | 148 | +:: |
149 | + DSA_COMP_HW_ERR1, | 149 | + |
150 | + DSA_COMP_HW_ERR_DRB, | 150 | + Architecture: x86_64 |
151 | + DSA_COMP_TRANSLATION_FAIL, | 151 | + CPU(s): 192 |
152 | +}; | 152 | + Thread(s) per core: 2 |
153 | + | 153 | + Core(s) per socket: 48 |
154 | +enum iax_completion_status { | 154 | + Socket(s): 2 |
155 | + IAX_COMP_NONE = 0, | 155 | + NUMA node(s): 2 |
156 | + IAX_COMP_SUCCESS, | 156 | + Vendor ID: GenuineIntel |
157 | + IAX_COMP_PAGE_FAULT_IR = 0x04, | 157 | + CPU family: 6 |
158 | + IAX_COMP_ANALYTICS_ERROR = 0x0a, | 158 | + Model: 143 |
159 | + IAX_COMP_OUTBUF_OVERFLOW, | 159 | + Model name: Intel(R) Xeon(R) Platinum 8457C |
160 | + IAX_COMP_BAD_OPCODE = 0x10, | 160 | + Stepping: 8 |
161 | + IAX_COMP_INVALID_FLAGS, | 161 | + CPU MHz: 2538.624 |
162 | + IAX_COMP_NOZERO_RESERVE, | 162 | + CPU max MHz: 3800.0000 |
163 | + IAX_COMP_INVALID_SIZE, | 163 | + CPU min MHz: 800.0000 |
164 | + IAX_COMP_OVERLAP_BUFFERS = 0x16, | 164 | + |
165 | + IAX_COMP_INT_HANDLE_INVAL = 0x19, | 165 | +We perform multifd live migration with below setup: |
166 | + IAX_COMP_CRA_XLAT, | 166 | + |
167 | + IAX_COMP_CRA_ALIGN, | 167 | +1. VM has 100GB memory. |
168 | + IAX_COMP_ADDR_ALIGN, | 168 | + |
169 | + IAX_COMP_PRIV_BAD, | 169 | +2. Use the new migration option multifd-set-normal-page-ratio to control the |
170 | + IAX_COMP_TRAFFIC_CLASS_CONF, | 170 | + total size of the payload sent over the network. |
171 | + IAX_COMP_PFAULT_RDBA, | 171 | + |
172 | + IAX_COMP_HW_ERR1, | 172 | +3. Use 8 multifd channels. |
173 | + IAX_COMP_HW_ERR_DRB, | 173 | + |
174 | + IAX_COMP_TRANSLATION_FAIL, | 174 | +4. Use tcp for live migration. |
175 | + IAX_COMP_PRS_TIMEOUT, | 175 | + |
176 | + IAX_COMP_WATCHDOG, | 176 | +5. Use CPU to perform zero page checking as the baseline. |
177 | + IAX_COMP_INVALID_COMP_FLAG = 0x30, | 177 | + |
178 | + IAX_COMP_INVALID_FILTER_FLAG, | 178 | +6. Use one DSA device to offload zero page checking to compare with the baseline. |
179 | + IAX_COMP_INVALID_INPUT_SIZE, | 179 | + |
180 | + IAX_COMP_INVALID_NUM_ELEMS, | 180 | +7. Use "perf sched record" and "perf sched timehist" to analyze CPU usage. |
181 | + IAX_COMP_INVALID_SRC1_WIDTH, | 181 | + |
182 | + IAX_COMP_INVALID_INVERT_OUT, | 182 | + |
183 | +}; | 183 | +A) Scenario 1: 50% (50GB) normal pages on an 100GB vm |
184 | + | 184 | +----------------------------------------------------- |
185 | +#define DSA_COMP_STATUS_MASK 0x7f | 185 | + |
186 | +#define DSA_COMP_STATUS_WRITE 0x80 | 186 | +:: |
187 | + | 187 | + |
188 | +struct dsa_hw_desc { | 188 | + CPU usage |
189 | + uint32_t pasid:20; | 189 | + |
190 | + uint32_t rsvd:11; | 190 | + |---------------|---------------|---------------|---------------| |
191 | + uint32_t priv:1; | 191 | + | |comm |runtime(msec) |totaltime(msec)| |
192 | + uint32_t flags:24; | 192 | + |---------------|---------------|---------------|---------------| |
193 | + uint32_t opcode:8; | 193 | + |Baseline |live_migration |5657.58 | | |
194 | + uint64_t completion_addr; | 194 | + | |multifdsend_0 |3931.563 | | |
195 | + union { | 195 | + | |multifdsend_1 |4405.273 | | |
196 | + uint64_t src_addr; | 196 | + | |multifdsend_2 |3941.968 | | |
197 | + uint64_t rdback_addr; | 197 | + | |multifdsend_3 |5032.975 | | |
198 | + uint64_t pattern; | 198 | + | |multifdsend_4 |4533.865 | | |
199 | + uint64_t desc_list_addr; | 199 | + | |multifdsend_5 |4530.461 | | |
200 | + }; | 200 | + | |multifdsend_6 |5171.916 | | |
201 | + union { | 201 | + | |multifdsend_7 |4722.769 |41922 | |
202 | + uint64_t dst_addr; | 202 | + |---------------|---------------|---------------|---------------| |
203 | + uint64_t rdback_addr2; | 203 | + |DSA |live_migration |6129.168 | | |
204 | + uint64_t src2_addr; | 204 | + | |multifdsend_0 |2954.717 | | |
205 | + uint64_t comp_pattern; | 205 | + | |multifdsend_1 |2766.359 | | |
206 | + }; | 206 | + | |multifdsend_2 |2853.519 | | |
207 | + union { | 207 | + | |multifdsend_3 |2740.717 | | |
208 | + uint32_t xfer_size; | 208 | + | |multifdsend_4 |2824.169 | | |
209 | + uint32_t desc_count; | 209 | + | |multifdsend_5 |2966.908 | | |
210 | + }; | 210 | + | |multifdsend_6 |2611.137 | | |
211 | + uint16_t int_handle; | 211 | + | |multifdsend_7 |3114.732 | | |
212 | + uint16_t rsvd1; | 212 | + | |dsa_completion |3612.564 |32568 | |
213 | + union { | 213 | + |---------------|---------------|---------------|---------------| |
214 | + uint8_t expected_res; | 214 | + |
215 | + /* create delta record */ | 215 | +Baseline total runtime is calculated by adding up all multifdsend_X |
216 | + struct { | 216 | +and live_migration threads runtime. DSA offloading total runtime is |
217 | + uint64_t delta_addr; | 217 | +calculated by adding up all multifdsend_X, live_migration and |
218 | + uint32_t max_delta_size; | 218 | +dsa_completion threads runtime. 41922 msec VS 32568 msec runtime and |
219 | + uint32_t delt_rsvd; | 219 | +that is 23% total CPU usage savings. |
220 | + uint8_t expected_res_mask; | 220 | + |
221 | + }; | 221 | +:: |
222 | + uint32_t delta_rec_size; | 222 | + |
223 | + uint64_t dest2; | 223 | + Latency |
224 | + /* CRC */ | 224 | + |---------------|---------------|---------------|---------------|---------------|---------------| |
225 | + struct { | 225 | + | |total time |down time |throughput |transferred-ram|total-ram | |
226 | + uint32_t crc_seed; | 226 | + |---------------|---------------|---------------|---------------|---------------|---------------| |
227 | + uint32_t crc_rsvd; | 227 | + |Baseline |10343 ms |161 ms |41007.00 mbps |51583797 kb |102400520 kb | |
228 | + uint64_t seed_addr; | 228 | + |---------------|---------------|---------------|---------------|-------------------------------| |
229 | + }; | 229 | + |DSA offload |9535 ms |135 ms |46554.40 mbps |53947545 kb |102400520 kb | |
230 | + /* DIF check or strip */ | 230 | + |---------------|---------------|---------------|---------------|---------------|---------------| |
231 | + struct { | 231 | + |
232 | + uint8_t src_dif_flags; | 232 | +Total time is 8% faster and down time is 16% faster. |
233 | + uint8_t dif_chk_res; | 233 | + |
234 | + uint8_t dif_chk_flags; | 234 | + |
235 | + uint8_t dif_chk_res2[5]; | 235 | +B) Scenario 2: 100% (100GB) zero pages on an 100GB vm |
236 | + uint32_t chk_ref_tag_seed; | 236 | +----------------------------------------------------- |
237 | + uint16_t chk_app_tag_mask; | 237 | + |
238 | + uint16_t chk_app_tag_seed; | 238 | +:: |
239 | + }; | 239 | + |
240 | + /* DIF insert */ | 240 | + CPU usage |
241 | + struct { | 241 | + |---------------|---------------|---------------|---------------| |
242 | + uint8_t dif_ins_res; | 242 | + | |comm |runtime(msec) |totaltime(msec)| |
243 | + uint8_t dest_dif_flag; | 243 | + |---------------|---------------|---------------|---------------| |
244 | + uint8_t dif_ins_flags; | 244 | + |Baseline |live_migration |4860.718 | | |
245 | + uint8_t dif_ins_res2[13]; | 245 | + | |multifdsend_0 |748.875 | | |
246 | + uint32_t ins_ref_tag_seed; | 246 | + | |multifdsend_1 |898.498 | | |
247 | + uint16_t ins_app_tag_mask; | 247 | + | |multifdsend_2 |787.456 | | |
248 | + uint16_t ins_app_tag_seed; | 248 | + | |multifdsend_3 |764.537 | | |
249 | + }; | 249 | + | |multifdsend_4 |785.687 | | |
250 | + /* DIF update */ | 250 | + | |multifdsend_5 |756.941 | | |
251 | + struct { | 251 | + | |multifdsend_6 |774.084 | | |
252 | + uint8_t src_upd_flags; | 252 | + | |multifdsend_7 |782.900 |11154 | |
253 | + uint8_t upd_dest_flags; | 253 | + |---------------|---------------|-------------------------------| |
254 | + uint8_t dif_upd_flags; | 254 | + |DSA offloading |live_migration |3846.976 | | |
255 | + uint8_t dif_upd_res[5]; | 255 | + | |multifdsend_0 |191.880 | | |
256 | + uint32_t src_ref_tag_seed; | 256 | + | |multifdsend_1 |166.331 | | |
257 | + uint16_t src_app_tag_mask; | 257 | + | |multifdsend_2 |168.528 | | |
258 | + uint16_t src_app_tag_seed; | 258 | + | |multifdsend_3 |197.831 | | |
259 | + uint32_t dest_ref_tag_seed; | 259 | + | |multifdsend_4 |169.580 | | |
260 | + uint16_t dest_app_tag_mask; | 260 | + | |multifdsend_5 |167.984 | | |
261 | + uint16_t dest_app_tag_seed; | 261 | + | |multifdsend_6 |198.042 | | |
262 | + }; | 262 | + | |multifdsend_7 |170.624 | | |
263 | + | 263 | + | |dsa_completion |3428.669 |8700 | |
264 | + uint8_t op_specific[24]; | 264 | + |---------------|---------------|---------------|---------------| |
265 | + }; | 265 | + |
266 | +} __attribute__((packed)); | 266 | +Baseline total runtime is 11154 msec and DSA offloading total runtime is |
267 | + | 267 | +8700 msec. That is 22% CPU savings. |
268 | +struct iax_hw_desc { | 268 | + |
269 | + uint32_t pasid:20; | 269 | +:: |
270 | + uint32_t rsvd:11; | 270 | + |
271 | + uint32_t priv:1; | 271 | + Latency |
272 | + uint32_t flags:24; | 272 | + |--------------------------------------------------------------------------------------------| |
273 | + uint32_t opcode:8; | 273 | + | |total time |down time |throughput |transferred-ram|total-ram | |
274 | + uint64_t completion_addr; | 274 | + |---------------|---------------|---------------|---------------|---------------|------------| |
275 | + uint64_t src1_addr; | 275 | + |Baseline |4867 ms |20 ms |1.51 mbps |565 kb |102400520 kb| |
276 | + uint64_t dst_addr; | 276 | + |---------------|---------------|---------------|---------------|----------------------------| |
277 | + uint32_t src1_size; | 277 | + |DSA offload |3888 ms |18 ms |1.89 mbps |565 kb |102400520 kb| |
278 | + uint16_t int_handle; | 278 | + |---------------|---------------|---------------|---------------|---------------|------------| |
279 | + union { | 279 | + |
280 | + uint16_t compr_flags; | 280 | +Total time 20% faster and down time 10% faster. |
281 | + uint16_t decompr_flags; | 281 | + |
282 | + }; | 282 | + |
283 | + uint64_t src2_addr; | 283 | +How To Use DSA In Migration |
284 | + uint32_t max_dst_size; | 284 | +=========================== |
285 | + uint32_t src2_size; | 285 | + |
286 | + uint32_t filter_flags; | 286 | +The migration parameter ``accel-path`` is used to specify the resource |
287 | + uint32_t num_inputs; | 287 | +allocation for DSA. After the user configures |
288 | +} __attribute__((packed)); | 288 | +``zero-page-detection=dsa-accel``, one or more DSA work queues need to be |
289 | + | 289 | +specified for migration. |
290 | +struct dsa_raw_desc { | 290 | + |
291 | + uint64_t field[8]; | 291 | +The following example shows two DSA work queues for zero page detection |
292 | +} __attribute__((packed)); | 292 | + |
293 | + | 293 | +.. code-block:: shell |
294 | +/* | 294 | + |
295 | + * The status field will be modified by hardware, therefore it should be | 295 | + migrate_set_parameter zero-page-detection=dsa-accel |
296 | + * volatile and prevent the compiler from optimize the read. | 296 | + migrate_set_parameter accel-path=dsa:/dev/dsa/wq0.0 dsa:/dev/dsa/wq1.0 |
297 | + */ | 297 | + |
298 | +struct dsa_completion_record { | 298 | +.. note:: |
299 | + volatile uint8_t status; | 299 | + |
300 | + union { | 300 | + Accessing DSA resources requires ``sudo`` command or ``root`` privileges |
301 | + uint8_t result; | 301 | + by default. Administrators can modify the DSA device node ownership |
302 | + uint8_t dif_status; | 302 | + so that QEMU can use DSA with specified user permissions. |
303 | + }; | 303 | + |
304 | + uint16_t rsvd; | 304 | + For example: |
305 | + uint32_t bytes_completed; | 305 | + |
306 | + uint64_t fault_addr; | 306 | + #chown -R qemu /dev/dsa |
307 | + union { | 307 | + |
308 | + /* common record */ | 308 | diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst |
309 | + struct { | 309 | index XXXXXXX..XXXXXXX 100644 |
310 | + uint32_t invalid_flags:24; | 310 | --- a/docs/devel/migration/features.rst |
311 | + uint32_t rsvd2:8; | 311 | +++ b/docs/devel/migration/features.rst |
312 | + }; | 312 | @@ -XXX,XX +XXX,XX @@ Migration has plenty of features to support different use cases. |
313 | + | 313 | qpl-compression |
314 | + uint32_t delta_rec_size; | 314 | uadk-compression |
315 | + uint64_t crc_val; | 315 | qatzip-compression |
316 | + | 316 | + dsa-zero-page-detection |
317 | + /* DIF check & strip */ | ||
318 | + struct { | ||
319 | + uint32_t dif_chk_ref_tag; | ||
320 | + uint16_t dif_chk_app_tag_mask; | ||
321 | + uint16_t dif_chk_app_tag; | ||
322 | + }; | ||
323 | + | ||
324 | + /* DIF insert */ | ||
325 | + struct { | ||
326 | + uint64_t dif_ins_res; | ||
327 | + uint32_t dif_ins_ref_tag; | ||
328 | + uint16_t dif_ins_app_tag_mask; | ||
329 | + uint16_t dif_ins_app_tag; | ||
330 | + }; | ||
331 | + | ||
332 | + /* DIF update */ | ||
333 | + struct { | ||
334 | + uint32_t dif_upd_src_ref_tag; | ||
335 | + uint16_t dif_upd_src_app_tag_mask; | ||
336 | + uint16_t dif_upd_src_app_tag; | ||
337 | + uint32_t dif_upd_dest_ref_tag; | ||
338 | + uint16_t dif_upd_dest_app_tag_mask; | ||
339 | + uint16_t dif_upd_dest_app_tag; | ||
340 | + }; | ||
341 | + | ||
342 | + uint8_t op_specific[16]; | ||
343 | + }; | ||
344 | +} __attribute__((packed)); | ||
345 | + | ||
346 | +struct dsa_raw_completion_record { | ||
347 | + uint64_t field[4]; | ||
348 | +} __attribute__((packed)); | ||
349 | + | ||
350 | +struct iax_completion_record { | ||
351 | + volatile uint8_t status; | ||
352 | + uint8_t error_code; | ||
353 | + uint16_t rsvd; | ||
354 | + uint32_t bytes_completed; | ||
355 | + uint64_t fault_addr; | ||
356 | + uint32_t invalid_flags; | ||
357 | + uint32_t rsvd2; | ||
358 | + uint32_t output_size; | ||
359 | + uint8_t output_bits; | ||
360 | + uint8_t rsvd3; | ||
361 | + uint16_t xor_csum; | ||
362 | + uint32_t crc; | ||
363 | + uint32_t min; | ||
364 | + uint32_t max; | ||
365 | + uint32_t sum; | ||
366 | + uint64_t rsvd4[2]; | ||
367 | +} __attribute__((packed)); | ||
368 | + | ||
369 | +struct iax_raw_completion_record { | ||
370 | + uint64_t field[8]; | ||
371 | +} __attribute__((packed)); | ||
372 | + | ||
373 | +#endif | ||
374 | -- | 317 | -- |
375 | 2.30.2 | 318 | Yichen Wang | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Multifd sender thread performs zero page checking. If a page is | ||
2 | a zero page, only the page's metadata is sent to the receiver. | ||
3 | If a page is a normal page, the entire page's content is sent to | ||
4 | the receiver. This change adds a test hook to set the normal page | ||
5 | ratio. A zero page will be forced to be sent as a normal page. This | ||
6 | is useful for live migration performance analysis and optimization. | ||
7 | 1 | ||
8 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | ||
9 | --- | ||
10 | migration/options.c | 31 +++++++++++++++++++++++++++++++ | ||
11 | migration/options.h | 1 + | ||
12 | qapi/migration.json | 18 +++++++++++++++--- | ||
13 | 3 files changed, 47 insertions(+), 3 deletions(-) | ||
14 | |||
15 | diff --git a/migration/options.c b/migration/options.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/migration/options.c | ||
18 | +++ b/migration/options.c | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 | ||
21 | #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 | ||
22 | |||
23 | +/* | ||
24 | + * Parameter for multifd normal page test hook. | ||
25 | + */ | ||
26 | +#define DEFAULT_MIGRATE_MULTIFD_NORMAL_PAGE_RATIO 101 | ||
27 | + | ||
28 | #define DEFINE_PROP_MIG_CAP(name, x) \ | ||
29 | DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ Property migration_properties[] = { | ||
32 | MIG_MODE_NORMAL), | ||
33 | DEFINE_PROP_STRING("multifd-dsa-accel", MigrationState, | ||
34 | parameters.multifd_dsa_accel), | ||
35 | + DEFINE_PROP_UINT8("multifd-normal-page-ratio", MigrationState, | ||
36 | + parameters.multifd_normal_page_ratio, | ||
37 | + DEFAULT_MIGRATE_MULTIFD_NORMAL_PAGE_RATIO), | ||
38 | |||
39 | /* Migration capabilities */ | ||
40 | DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), | ||
41 | @@ -XXX,XX +XXX,XX @@ int migrate_multifd_channels(void) | ||
42 | return s->parameters.multifd_channels; | ||
43 | } | ||
44 | |||
45 | +uint8_t migrate_multifd_normal_page_ratio(void) | ||
46 | +{ | ||
47 | + MigrationState *s = migrate_get_current(); | ||
48 | + return s->parameters.multifd_normal_page_ratio; | ||
49 | +} | ||
50 | + | ||
51 | MultiFDCompression migrate_multifd_compression(void) | ||
52 | { | ||
53 | MigrationState *s = migrate_get_current(); | ||
54 | @@ -XXX,XX +XXX,XX @@ bool migrate_params_check(MigrationParameters *params, Error **errp) | ||
55 | return false; | ||
56 | } | ||
57 | |||
58 | + if (params->has_multifd_normal_page_ratio && | ||
59 | + params->multifd_normal_page_ratio > 100) { | ||
60 | + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, | ||
61 | + "multifd_normal_page_ratio", | ||
62 | + "a value between 0 and 100"); | ||
63 | + return false; | ||
64 | + } | ||
65 | + | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_test_apply(MigrateSetParameters *params, | ||
70 | assert(params->multifd_dsa_accel->type == QTYPE_QSTRING); | ||
71 | dest->multifd_dsa_accel = params->multifd_dsa_accel->u.s; | ||
72 | } | ||
73 | + | ||
74 | + if (params->has_multifd_normal_page_ratio) { | ||
75 | + dest->has_multifd_normal_page_ratio = true; | ||
76 | + dest->multifd_normal_page_ratio = params->multifd_normal_page_ratio; | ||
77 | + } | ||
78 | } | ||
79 | |||
80 | static void migrate_params_apply(MigrateSetParameters *params, Error **errp) | ||
81 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) | ||
82 | assert(params->multifd_dsa_accel->type == QTYPE_QSTRING); | ||
83 | s->parameters.multifd_dsa_accel = g_strdup(params->multifd_dsa_accel->u.s); | ||
84 | } | ||
85 | + | ||
86 | + if (params->has_multifd_normal_page_ratio) { | ||
87 | + s->parameters.multifd_normal_page_ratio = params->multifd_normal_page_ratio; | ||
88 | + } | ||
89 | } | ||
90 | |||
91 | void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) | ||
92 | diff --git a/migration/options.h b/migration/options.h | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/migration/options.h | ||
95 | +++ b/migration/options.h | ||
96 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_creds(void); | ||
97 | const char *migrate_tls_hostname(void); | ||
98 | uint64_t migrate_xbzrle_cache_size(void); | ||
99 | const char *migrate_multifd_dsa_accel(void); | ||
100 | +uint8_t migrate_multifd_normal_page_ratio(void); | ||
101 | |||
102 | /* parameters setters */ | ||
103 | |||
104 | diff --git a/qapi/migration.json b/qapi/migration.json | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/qapi/migration.json | ||
107 | +++ b/qapi/migration.json | ||
108 | @@ -XXX,XX +XXX,XX @@ | ||
109 | # @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | ||
110 | # certain memory operations. (since 8.2) | ||
111 | # | ||
112 | +# @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
113 | +# (Since 8.2) | ||
114 | +# | ||
115 | # Features: | ||
116 | # | ||
117 | # @deprecated: Member @block-incremental is deprecated. Use | ||
118 | @@ -XXX,XX +XXX,XX @@ | ||
119 | 'block-bitmap-mapping', | ||
120 | { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, | ||
121 | 'vcpu-dirty-limit', | ||
122 | - 'mode'] } | ||
123 | + 'mode', | ||
124 | + 'multifd-normal-page-ratio'] } | ||
125 | |||
126 | ## | ||
127 | # @MigrateSetParameters: | ||
128 | @@ -XXX,XX +XXX,XX @@ | ||
129 | # @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | ||
130 | # certain memory operations. (since 8.2) | ||
131 | # | ||
132 | +# @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
133 | +# (Since 8.2) | ||
134 | +# | ||
135 | # Features: | ||
136 | # | ||
137 | # @deprecated: Member @block-incremental is deprecated. Use | ||
138 | @@ -XXX,XX +XXX,XX @@ | ||
139 | 'features': [ 'unstable' ] }, | ||
140 | '*vcpu-dirty-limit': 'uint64', | ||
141 | '*mode': 'MigMode', | ||
142 | - '*multifd-dsa-accel': 'StrOrNull'} } | ||
143 | + '*multifd-dsa-accel': 'StrOrNull', | ||
144 | + '*multifd-normal-page-ratio': 'uint8'} } | ||
145 | |||
146 | ## | ||
147 | # @migrate-set-parameters: | ||
148 | @@ -XXX,XX +XXX,XX @@ | ||
149 | # @multifd-dsa-accel: If enabled, use DSA accelerator offloading for | ||
150 | # certain memory operations. (since 8.2) | ||
151 | # | ||
152 | +# @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
153 | +# (Since 8.2) | ||
154 | +# | ||
155 | # Features: | ||
156 | # | ||
157 | # @deprecated: Member @block-incremental is deprecated. Use | ||
158 | @@ -XXX,XX +XXX,XX @@ | ||
159 | 'features': [ 'unstable' ] }, | ||
160 | '*vcpu-dirty-limit': 'uint64', | ||
161 | '*mode': 'MigMode', | ||
162 | - '*multifd-dsa-accel': 'str'} } | ||
163 | + '*multifd-dsa-accel': 'str', | ||
164 | + '*multifd-normal-page-ratio': 'uint8'} } | ||
165 | |||
166 | ## | ||
167 | # @query-migrate-parameters: | ||
168 | -- | ||
169 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Test hook is disabled by default. To set it, a normal page ratio | ||
2 | between 0 and 100 are valid. If the ratio is set to 50, it means | ||
3 | at least 50% of all pages are sent as normal pages. | ||
4 | 1 | ||
5 | Set the option: | ||
6 | migrate_set_parameter multifd-normal-page-ratio 60 | ||
7 | |||
8 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | ||
9 | --- | ||
10 | include/qemu/dsa.h | 7 ++++++- | ||
11 | migration/migration-hmp-cmds.c | 7 +++++++ | ||
12 | migration/multifd.c | 33 +++++++++++++++++++++++++++++++++ | ||
13 | 3 files changed, 46 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/qemu/dsa.h | ||
18 | +++ b/include/qemu/dsa.h | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct buffer_zero_batch_task { | ||
20 | enum dsa_task_type task_type; | ||
21 | enum dsa_task_status status; | ||
22 | bool *results; | ||
23 | - int batch_size; | ||
24 | + uint32_t batch_size; | ||
25 | + // Set normal page ratio test hook. | ||
26 | + uint32_t normal_page_index; | ||
27 | + uint32_t normal_page_counter; | ||
28 | QSIMPLEQ_ENTRY(buffer_zero_batch_task) entry; | ||
29 | } buffer_zero_batch_task; | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ typedef struct buffer_zero_batch_task { | ||
32 | |||
33 | struct buffer_zero_batch_task { | ||
34 | bool *results; | ||
35 | + uint32_t normal_page_index; | ||
36 | + uint32_t normal_page_counter; | ||
37 | }; | ||
38 | |||
39 | #endif | ||
40 | diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/migration/migration-hmp-cmds.c | ||
43 | +++ b/migration/migration-hmp-cmds.c | ||
44 | @@ -XXX,XX +XXX,XX @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) | ||
45 | monitor_printf(mon, "%s: %s\n", | ||
46 | MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_DSA_ACCEL), | ||
47 | params->multifd_dsa_accel); | ||
48 | + monitor_printf(mon, "%s: %u\n", | ||
49 | + MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_NORMAL_PAGE_RATIO), | ||
50 | + params->multifd_normal_page_ratio); | ||
51 | |||
52 | if (params->has_block_bitmap_mapping) { | ||
53 | const BitmapMigrationNodeAliasList *bmnal; | ||
54 | @@ -XXX,XX +XXX,XX @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) | ||
55 | error_setg(&err, "The block-bitmap-mapping parameter can only be set " | ||
56 | "through QMP"); | ||
57 | break; | ||
58 | + case MIGRATION_PARAMETER_MULTIFD_NORMAL_PAGE_RATIO: | ||
59 | + p->has_multifd_normal_page_ratio = true; | ||
60 | + visit_type_uint8(v, param, &p->multifd_normal_page_ratio, &err); | ||
61 | + break; | ||
62 | case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD: | ||
63 | p->has_x_vcpu_dirty_limit_period = true; | ||
64 | visit_type_size(v, param, &p->x_vcpu_dirty_limit_period, &err); | ||
65 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/migration/multifd.c | ||
68 | +++ b/migration/multifd.c | ||
69 | @@ -XXX,XX +XXX,XX @@ int multifd_send_sync_main(QEMUFile *f) | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | +static void multifd_normal_page_test_hook(MultiFDSendParams *p) | ||
74 | +{ | ||
75 | + /* | ||
76 | + * The value is between 0 to 100. If the value is 10, it means at | ||
77 | + * least 10% of the pages are normal page. A zero page can be made | ||
78 | + * a normal page but not the other way around. | ||
79 | + */ | ||
80 | + uint8_t multifd_normal_page_ratio = | ||
81 | + migrate_multifd_normal_page_ratio(); | ||
82 | + struct buffer_zero_batch_task *batch_task = p->batch_task; | ||
83 | + | ||
84 | + // Set normal page test hook is disabled. | ||
85 | + if (multifd_normal_page_ratio > 100) { | ||
86 | + return; | ||
87 | + } | ||
88 | + | ||
89 | + for (int i = 0; i < p->pages->num; i++) { | ||
90 | + if (batch_task->normal_page_counter < multifd_normal_page_ratio) { | ||
91 | + // Turn a zero page into a normal page. | ||
92 | + batch_task->results[i] = false; | ||
93 | + } | ||
94 | + batch_task->normal_page_index++; | ||
95 | + batch_task->normal_page_counter++; | ||
96 | + | ||
97 | + if (batch_task->normal_page_index >= 100) { | ||
98 | + batch_task->normal_page_index = 0; | ||
99 | + batch_task->normal_page_counter = 0; | ||
100 | + } | ||
101 | + } | ||
102 | +} | ||
103 | + | ||
104 | static void set_page(MultiFDSendParams *p, bool zero_page, uint64_t offset) | ||
105 | { | ||
106 | RAMBlock *rb = p->pages->block; | ||
107 | @@ -XXX,XX +XXX,XX @@ static void multifd_zero_page_check(MultiFDSendParams *p) | ||
108 | set_normal_pages(p); | ||
109 | } | ||
110 | |||
111 | + multifd_normal_page_test_hook(p); | ||
112 | + | ||
113 | for (int i = 0; i < p->pages->num; i++) { | ||
114 | uint64_t offset = p->pages->offset[i]; | ||
115 | bool zero_page = p->batch_task->results[i]; | ||
116 | -- | ||
117 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The current multifd packet size is 128 * 4kb. This change adds | ||
2 | an option to set the packet size. Both sender and receiver needs | ||
3 | to set the same packet size for things to work. | ||
4 | 1 | ||
5 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | ||
6 | --- | ||
7 | migration/options.c | 34 ++++++++++++++++++++++++++++++++++ | ||
8 | migration/options.h | 1 + | ||
9 | qapi/migration.json | 21 ++++++++++++++++++--- | ||
10 | 3 files changed, 53 insertions(+), 3 deletions(-) | ||
11 | |||
12 | diff --git a/migration/options.c b/migration/options.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/migration/options.c | ||
15 | +++ b/migration/options.c | ||
16 | @@ -XXX,XX +XXX,XX @@ | ||
17 | */ | ||
18 | #define DEFAULT_MIGRATE_MULTIFD_NORMAL_PAGE_RATIO 101 | ||
19 | |||
20 | +/* | ||
21 | + * Parameter for multifd packet size. | ||
22 | + */ | ||
23 | +#define DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE 128 | ||
24 | +#define MAX_MIGRATE_MULTIFD_PACKET_SIZE 1024 | ||
25 | + | ||
26 | #define DEFINE_PROP_MIG_CAP(name, x) \ | ||
27 | DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) | ||
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ Property migration_properties[] = { | ||
30 | DEFINE_PROP_UINT8("multifd-normal-page-ratio", MigrationState, | ||
31 | parameters.multifd_normal_page_ratio, | ||
32 | DEFAULT_MIGRATE_MULTIFD_NORMAL_PAGE_RATIO), | ||
33 | + DEFINE_PROP_SIZE("multifd-packet-size", MigrationState, | ||
34 | + parameters.multifd_packet_size, | ||
35 | + DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE), | ||
36 | |||
37 | /* Migration capabilities */ | ||
38 | DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), | ||
39 | @@ -XXX,XX +XXX,XX @@ uint8_t migrate_multifd_normal_page_ratio(void) | ||
40 | return s->parameters.multifd_normal_page_ratio; | ||
41 | } | ||
42 | |||
43 | +uint64_t migrate_multifd_packet_size(void) | ||
44 | +{ | ||
45 | + MigrationState *s = migrate_get_current(); | ||
46 | + | ||
47 | + return s->parameters.multifd_packet_size; | ||
48 | +} | ||
49 | + | ||
50 | MultiFDCompression migrate_multifd_compression(void) | ||
51 | { | ||
52 | MigrationState *s = migrate_get_current(); | ||
53 | @@ -XXX,XX +XXX,XX @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) | ||
54 | params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; | ||
55 | params->has_block_incremental = true; | ||
56 | params->block_incremental = s->parameters.block_incremental; | ||
57 | + params->has_multifd_packet_size = true; | ||
58 | + params->multifd_packet_size = s->parameters.multifd_packet_size; | ||
59 | params->has_multifd_channels = true; | ||
60 | params->multifd_channels = s->parameters.multifd_channels; | ||
61 | params->has_multifd_compression = true; | ||
62 | @@ -XXX,XX +XXX,XX @@ void migrate_params_init(MigrationParameters *params) | ||
63 | params->has_downtime_limit = true; | ||
64 | params->has_x_checkpoint_delay = true; | ||
65 | params->has_block_incremental = true; | ||
66 | + params->has_multifd_packet_size = true; | ||
67 | params->has_multifd_channels = true; | ||
68 | params->has_multifd_compression = true; | ||
69 | params->has_multifd_zlib_level = true; | ||
70 | @@ -XXX,XX +XXX,XX @@ bool migrate_params_check(MigrationParameters *params, Error **errp) | ||
71 | |||
72 | /* x_checkpoint_delay is now always positive */ | ||
73 | |||
74 | + if (params->has_multifd_packet_size && | ||
75 | + ((params->multifd_packet_size < DEFAULT_MIGRATE_MULTIFD_PACKET_SIZE) || | ||
76 | + (params->multifd_packet_size > MAX_MIGRATE_MULTIFD_PACKET_SIZE))) { | ||
77 | + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, | ||
78 | + "multifd_packet_size", | ||
79 | + "a value between 128 and 1024"); | ||
80 | + return false; | ||
81 | + } | ||
82 | + | ||
83 | if (params->has_multifd_channels && (params->multifd_channels < 1)) { | ||
84 | error_setg(errp, QERR_INVALID_PARAMETER_VALUE, | ||
85 | "multifd_channels", | ||
86 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_test_apply(MigrateSetParameters *params, | ||
87 | if (params->has_block_incremental) { | ||
88 | dest->block_incremental = params->block_incremental; | ||
89 | } | ||
90 | + if (params->has_multifd_packet_size) { | ||
91 | + dest->multifd_packet_size = params->multifd_packet_size; | ||
92 | + } | ||
93 | if (params->has_multifd_channels) { | ||
94 | dest->multifd_channels = params->multifd_channels; | ||
95 | } | ||
96 | @@ -XXX,XX +XXX,XX @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) | ||
97 | " use blockdev-mirror with NBD instead"); | ||
98 | s->parameters.block_incremental = params->block_incremental; | ||
99 | } | ||
100 | + if (params->has_multifd_packet_size) { | ||
101 | + s->parameters.multifd_packet_size = params->multifd_packet_size; | ||
102 | + } | ||
103 | if (params->has_multifd_channels) { | ||
104 | s->parameters.multifd_channels = params->multifd_channels; | ||
105 | } | ||
106 | diff --git a/migration/options.h b/migration/options.h | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/migration/options.h | ||
109 | +++ b/migration/options.h | ||
110 | @@ -XXX,XX +XXX,XX @@ const char *migrate_tls_hostname(void); | ||
111 | uint64_t migrate_xbzrle_cache_size(void); | ||
112 | const char *migrate_multifd_dsa_accel(void); | ||
113 | uint8_t migrate_multifd_normal_page_ratio(void); | ||
114 | +uint64_t migrate_multifd_packet_size(void); | ||
115 | |||
116 | /* parameters setters */ | ||
117 | |||
118 | diff --git a/qapi/migration.json b/qapi/migration.json | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/qapi/migration.json | ||
121 | +++ b/qapi/migration.json | ||
122 | @@ -XXX,XX +XXX,XX @@ | ||
123 | # @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
124 | # (Since 8.2) | ||
125 | # | ||
126 | +# @multifd-packet-size: Packet size used to migrate data. This value | ||
127 | +# indicates the number of pages in a packet. The default value | ||
128 | +# is 128 and max value is 1024. (Since 8.2) | ||
129 | +# | ||
130 | # Features: | ||
131 | # | ||
132 | # @deprecated: Member @block-incremental is deprecated. Use | ||
133 | @@ -XXX,XX +XXX,XX @@ | ||
134 | { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, | ||
135 | 'vcpu-dirty-limit', | ||
136 | 'mode', | ||
137 | - 'multifd-normal-page-ratio'] } | ||
138 | + 'multifd-normal-page-ratio', | ||
139 | + 'multifd-packet-size'] } | ||
140 | |||
141 | ## | ||
142 | # @MigrateSetParameters: | ||
143 | @@ -XXX,XX +XXX,XX @@ | ||
144 | # @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
145 | # (Since 8.2) | ||
146 | # | ||
147 | +# @multifd-packet-size: Packet size used to migrate data. This value | ||
148 | +# indicates the number of pages in a packet. The default value | ||
149 | +# is 128 and max value is 1024. (Since 8.2) | ||
150 | +# | ||
151 | # Features: | ||
152 | # | ||
153 | # @deprecated: Member @block-incremental is deprecated. Use | ||
154 | @@ -XXX,XX +XXX,XX @@ | ||
155 | '*vcpu-dirty-limit': 'uint64', | ||
156 | '*mode': 'MigMode', | ||
157 | '*multifd-dsa-accel': 'StrOrNull', | ||
158 | - '*multifd-normal-page-ratio': 'uint8'} } | ||
159 | + '*multifd-normal-page-ratio': 'uint8', | ||
160 | + '*multifd-packet-size' : 'uint64'} } | ||
161 | |||
162 | ## | ||
163 | # @migrate-set-parameters: | ||
164 | @@ -XXX,XX +XXX,XX @@ | ||
165 | # @multifd-normal-page-ratio: Test hook setting the normal page ratio. | ||
166 | # (Since 8.2) | ||
167 | # | ||
168 | +# @multifd-packet-size: Packet size used to migrate data. This value | ||
169 | +# indicates the number of pages in a packet. The default value | ||
170 | +# is 128 and max value is 1024. (Since 8.2) | ||
171 | +# | ||
172 | # Features: | ||
173 | # | ||
174 | # @deprecated: Member @block-incremental is deprecated. Use | ||
175 | @@ -XXX,XX +XXX,XX @@ | ||
176 | '*vcpu-dirty-limit': 'uint64', | ||
177 | '*mode': 'MigMode', | ||
178 | '*multifd-dsa-accel': 'str', | ||
179 | - '*multifd-normal-page-ratio': 'uint8'} } | ||
180 | + '*multifd-normal-page-ratio': 'uint8', | ||
181 | + '*multifd-packet-size': 'uint64'} } | ||
182 | |||
183 | ## | ||
184 | # @query-migrate-parameters: | ||
185 | -- | ||
186 | 2.30.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | During live migration, if the latency between sender and receiver | ||
2 | is high but bandwidth is high (a long and fat pipe), using a bigger | ||
3 | packet size can help reduce migration total time. In addition, Intel | ||
4 | DSA offloading performs better with a large batch task. Providing an | ||
5 | option to set the packet size is useful for performance tuning. | ||
6 | 1 | ||
7 | Set the option: | ||
8 | migrate_set_parameter multifd-packet-size 512 | ||
9 | |||
10 | Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> | ||
11 | --- | ||
12 | migration/migration-hmp-cmds.c | 7 +++++++ | ||
13 | migration/multifd-zlib.c | 8 ++++++-- | ||
14 | migration/multifd-zstd.c | 8 ++++++-- | ||
15 | migration/multifd.c | 4 ++-- | ||
16 | migration/multifd.h | 3 --- | ||
17 | 5 files changed, 21 insertions(+), 9 deletions(-) | ||
18 | |||
19 | diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/migration/migration-hmp-cmds.c | ||
22 | +++ b/migration/migration-hmp-cmds.c | ||
23 | @@ -XXX,XX +XXX,XX @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) | ||
24 | monitor_printf(mon, "%s: %s\n", | ||
25 | MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL), | ||
26 | params->block_incremental ? "on" : "off"); | ||
27 | + monitor_printf(mon, "%s: %" PRIu64 "\n", | ||
28 | + MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_PACKET_SIZE), | ||
29 | + params->multifd_packet_size); | ||
30 | monitor_printf(mon, "%s: %u\n", | ||
31 | MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_CHANNELS), | ||
32 | params->multifd_channels); | ||
33 | @@ -XXX,XX +XXX,XX @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) | ||
34 | p->multifd_dsa_accel->type = QTYPE_QSTRING; | ||
35 | visit_type_str(v, param, &p->multifd_dsa_accel->u.s, &err); | ||
36 | break; | ||
37 | + case MIGRATION_PARAMETER_MULTIFD_PACKET_SIZE: | ||
38 | + p->has_multifd_packet_size = true; | ||
39 | + visit_type_size(v, param, &p->multifd_packet_size, &err); | ||
40 | + break; | ||
41 | case MIGRATION_PARAMETER_MULTIFD_CHANNELS: | ||
42 | p->has_multifd_channels = true; | ||
43 | visit_type_uint8(v, param, &p->multifd_channels, &err); | ||
44 | diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/migration/multifd-zlib.c | ||
47 | +++ b/migration/multifd-zlib.c | ||
48 | @@ -XXX,XX +XXX,XX @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) | ||
49 | struct zlib_data *z = g_new0(struct zlib_data, 1); | ||
50 | z_stream *zs = &z->zs; | ||
51 | const char *err_msg; | ||
52 | + uint64_t multifd_packet_size = | ||
53 | + migrate_multifd_packet_size() * qemu_target_page_size(); | ||
54 | |||
55 | zs->zalloc = Z_NULL; | ||
56 | zs->zfree = Z_NULL; | ||
57 | @@ -XXX,XX +XXX,XX @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) | ||
58 | goto err_free_z; | ||
59 | } | ||
60 | /* This is the maximum size of the compressed buffer */ | ||
61 | - z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); | ||
62 | + z->zbuff_len = compressBound(multifd_packet_size); | ||
63 | z->zbuff = g_try_malloc(z->zbuff_len); | ||
64 | if (!z->zbuff) { | ||
65 | err_msg = "out of memory for zbuff"; | ||
66 | @@ -XXX,XX +XXX,XX @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) | ||
67 | */ | ||
68 | static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) | ||
69 | { | ||
70 | + uint64_t multifd_packet_size = | ||
71 | + migrate_multifd_packet_size() * qemu_target_page_size(); | ||
72 | struct zlib_data *z = g_new0(struct zlib_data, 1); | ||
73 | z_stream *zs = &z->zs; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) | ||
76 | return -1; | ||
77 | } | ||
78 | /* To be safe, we reserve twice the size of the packet */ | ||
79 | - z->zbuff_len = MULTIFD_PACKET_SIZE * 2; | ||
80 | + z->zbuff_len = multifd_packet_size * 2; | ||
81 | z->zbuff = g_try_malloc(z->zbuff_len); | ||
82 | if (!z->zbuff) { | ||
83 | inflateEnd(zs); | ||
84 | diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/migration/multifd-zstd.c | ||
87 | +++ b/migration/multifd-zstd.c | ||
88 | @@ -XXX,XX +XXX,XX @@ struct zstd_data { | ||
89 | */ | ||
90 | static int zstd_send_setup(MultiFDSendParams *p, Error **errp) | ||
91 | { | ||
92 | + uint64_t multifd_packet_size = | ||
93 | + migrate_multifd_packet_size() * qemu_target_page_size(); | ||
94 | struct zstd_data *z = g_new0(struct zstd_data, 1); | ||
95 | int res; | ||
96 | |||
97 | @@ -XXX,XX +XXX,XX @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) | ||
98 | return -1; | ||
99 | } | ||
100 | /* This is the maximum size of the compressed buffer */ | ||
101 | - z->zbuff_len = ZSTD_compressBound(MULTIFD_PACKET_SIZE); | ||
102 | + z->zbuff_len = ZSTD_compressBound(multifd_packet_size); | ||
103 | z->zbuff = g_try_malloc(z->zbuff_len); | ||
104 | if (!z->zbuff) { | ||
105 | ZSTD_freeCStream(z->zcs); | ||
106 | @@ -XXX,XX +XXX,XX @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) | ||
107 | */ | ||
108 | static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) | ||
109 | { | ||
110 | + uint64_t multifd_packet_size = | ||
111 | + migrate_multifd_packet_size() * qemu_target_page_size(); | ||
112 | struct zstd_data *z = g_new0(struct zstd_data, 1); | ||
113 | int ret; | ||
114 | |||
115 | @@ -XXX,XX +XXX,XX @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) | ||
116 | } | ||
117 | |||
118 | /* To be safe, we reserve twice the size of the packet */ | ||
119 | - z->zbuff_len = MULTIFD_PACKET_SIZE * 2; | ||
120 | + z->zbuff_len = multifd_packet_size * 2; | ||
121 | z->zbuff = g_try_malloc(z->zbuff_len); | ||
122 | if (!z->zbuff) { | ||
123 | ZSTD_freeDStream(z->zds); | ||
124 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/migration/multifd.c | ||
127 | +++ b/migration/multifd.c | ||
128 | @@ -XXX,XX +XXX,XX @@ static void multifd_new_send_channel_create(gpointer opaque) | ||
129 | int multifd_save_setup(Error **errp) | ||
130 | { | ||
131 | int thread_count; | ||
132 | - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); | ||
133 | + uint32_t page_count = migrate_multifd_packet_size(); | ||
134 | uint8_t i; | ||
135 | const char *dsa_parameter = migrate_multifd_dsa_accel(); | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
138 | int multifd_load_setup(Error **errp) | ||
139 | { | ||
140 | int thread_count; | ||
141 | - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); | ||
142 | + uint32_t page_count = migrate_multifd_packet_size(); | ||
143 | uint8_t i; | ||
144 | const char *dsa_parameter = migrate_multifd_dsa_accel(); | ||
145 | |||
146 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
147 | index XXXXXXX..XXXXXXX 100644 | ||
148 | --- a/migration/multifd.h | ||
149 | +++ b/migration/multifd.h | ||
150 | @@ -XXX,XX +XXX,XX @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); | ||
151 | #define MULTIFD_FLAG_ZLIB (1 << 1) | ||
152 | #define MULTIFD_FLAG_ZSTD (2 << 1) | ||
153 | |||
154 | -/* This value needs to be a multiple of qemu_target_page_size() */ | ||
155 | -#define MULTIFD_PACKET_SIZE (512 * 1024) | ||
156 | - | ||
157 | typedef struct { | ||
158 | uint32_t magic; | ||
159 | uint32_t version; | ||
160 | -- | ||
161 | 2.30.2 | diff view generated by jsdifflib |