1
The following changes since commit f5fe7c17ac4e309e47e78f0f9761aebc8d2f2c81:
1
The following changes since commit 469e72ab7dbbd7ff4ee601e5ea7c29545d46593b:
2
2
3
Merge tag 'pull-tcg-20230823-2' of https://gitlab.com/rth7680/qemu into staging (2023-08-28 16:07:04 -0400)
3
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2020-10-02 16:19:42 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/hreitz/qemu.git tags/pull-block-2023-09-01
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 380448464dd89291cf7fd7434be6c225482a334d:
9
for you to fetch changes up to 9ab5741164b1727d22f69fe7001382baf0d56977:
10
10
11
tests/file-io-error: New test (2023-08-29 13:01:24 +0200)
11
util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions (2020-10-05 10:59:42 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches
14
Pull request
15
15
16
- Fix for file-posix's zoning code crashing on I/O errors
16
v2:
17
- Throttling refactoring
17
* Removed clang-format call from scripts/block-coroutine-wrapper.py. This
18
avoids the issue with clang version incompatibility. It could be added back
19
in the future but the code is readable without reformatting and it also
20
makes the build less dependent on the environment.
18
21
19
----------------------------------------------------------------
22
----------------------------------------------------------------
20
Hanna Czenczek (5):
21
file-posix: Clear bs->bl.zoned on error
22
file-posix: Check bs->bl.zoned for zone info
23
file-posix: Fix zone update in I/O error path
24
file-posix: Simplify raw_co_prw's 'out' zone code
25
tests/file-io-error: New test
26
23
27
Zhenwei Pi (9):
24
Eric Auger (2):
28
throttle: introduce enum ThrottleDirection
25
util/vfio-helpers: Collect IOVA reserved regions
29
test-throttle: use enum ThrottleDirection
26
util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved
30
throttle: support read-only and write-only
27
regions
31
test-throttle: test read only and write only
32
cryptodev: use NULL throttle timer cb for read direction
33
throttle: use enum ThrottleDirection instead of bool is_write
34
throttle: use THROTTLE_MAX/ARRAY_SIZE for hard code
35
fsdev: Use ThrottleDirection instread of bool is_write
36
block/throttle-groups: Use ThrottleDirection instread of bool is_write
37
28
38
fsdev/qemu-fsdev-throttle.h | 4 +-
29
Philippe Mathieu-Daudé (6):
39
include/block/throttle-groups.h | 6 +-
30
util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()
40
include/qemu/throttle.h | 16 +-
31
block/nvme: Map doorbells pages write-only
41
backends/cryptodev.c | 12 +-
32
block/nvme: Reduce I/O registers scope
42
block/block-backend.c | 4 +-
33
block/nvme: Drop NVMeRegs structure, directly use NvmeBar
43
block/file-posix.c | 42 +++---
34
block/nvme: Use register definitions from 'block/nvme.h'
44
block/throttle-groups.c | 163 +++++++++++----------
35
block/nvme: Replace magic value by SCALE_MS definition
45
block/throttle.c | 8 +-
36
46
fsdev/qemu-fsdev-throttle.c | 18 ++-
37
Stefano Garzarella (1):
47
hw/9pfs/cofile.c | 4 +-
38
docs: add 'io_uring' option to 'aio' param in qemu-options.hx
48
tests/unit/test-throttle.c | 76 +++++++++-
39
49
util/throttle.c | 84 +++++++----
40
Vladimir Sementsov-Ogievskiy (8):
50
tests/qemu-iotests/tests/file-io-error | 119 +++++++++++++++
41
block: return error-code from bdrv_invalidate_cache
51
tests/qemu-iotests/tests/file-io-error.out | 33 +++++
42
block/io: refactor coroutine wrappers
52
14 files changed, 418 insertions(+), 171 deletions(-)
43
block: declare some coroutine functions in block/coroutines.h
53
create mode 100755 tests/qemu-iotests/tests/file-io-error
44
scripts: add block-coroutine-wrapper.py
54
create mode 100644 tests/qemu-iotests/tests/file-io-error.out
45
block: generate coroutine-wrapper code
46
block: drop bdrv_prwv
47
block/io: refactor save/load vmstate
48
include/block/block.h: drop non-ascii quotation mark
49
50
block/block-gen.h | 49 ++++
51
block/coroutines.h | 65 +++++
52
include/block/block.h | 36 ++-
53
include/qemu/vfio-helpers.h | 2 +-
54
block.c | 97 +------
55
block/io.c | 339 ++++---------------------
56
block/nvme.c | 73 +++---
57
tests/test-bdrv-drain.c | 2 +-
58
util/vfio-helpers.c | 133 +++++++++-
59
block/meson.build | 8 +
60
docs/devel/block-coroutine-wrapper.rst | 54 ++++
61
docs/devel/index.rst | 1 +
62
qemu-options.hx | 10 +-
63
scripts/block-coroutine-wrapper.py | 167 ++++++++++++
64
14 files changed, 608 insertions(+), 428 deletions(-)
65
create mode 100644 block/block-gen.h
66
create mode 100644 block/coroutines.h
67
create mode 100644 docs/devel/block-coroutine-wrapper.rst
68
create mode 100644 scripts/block-coroutine-wrapper.py
55
69
56
--
70
--
57
2.41.0
71
2.26.2
72
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Pages are currently mapped READ/WRITE. To be able to use different
4
protections, add a new argument to qemu_vfio_pci_map_bar().
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200922083821.578519-2-philmd@redhat.com>
9
---
10
include/qemu/vfio-helpers.h | 2 +-
11
block/nvme.c | 3 ++-
12
util/vfio-helpers.c | 4 ++--
13
3 files changed, 5 insertions(+), 4 deletions(-)
14
15
diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/vfio-helpers.h
18
+++ b/include/qemu/vfio-helpers.h
19
@@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
20
int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
21
void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
22
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
23
- uint64_t offset, uint64_t size,
24
+ uint64_t offset, uint64_t size, int prot,
25
Error **errp);
26
void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
27
uint64_t offset, uint64_t size);
28
diff --git a/block/nvme.c b/block/nvme.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/block/nvme.c
31
+++ b/block/nvme.c
32
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
33
goto out;
34
}
35
36
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp);
37
+ s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
38
+ PROT_READ | PROT_WRITE, errp);
39
if (!s->regs) {
40
ret = -EINVAL;
41
goto out;
42
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/util/vfio-helpers.c
45
+++ b/util/vfio-helpers.c
46
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
47
* Map a PCI bar area.
48
*/
49
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
50
- uint64_t offset, uint64_t size,
51
+ uint64_t offset, uint64_t size, int prot,
52
Error **errp)
53
{
54
void *p;
55
assert_bar_index_valid(s, index);
56
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
57
- PROT_READ | PROT_WRITE, MAP_SHARED,
58
+ prot, MAP_SHARED,
59
s->device, s->bar_region_info[index].offset + offset);
60
if (p == MAP_FAILED) {
61
error_setg_errno(errp, errno, "Failed to map BAR region");
62
--
63
2.26.2
64
diff view generated by jsdifflib
1
We must check that zone information is present before running
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
update_zones_wp().
3
2
4
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2234374
3
Per the datasheet sections 3.1.13/3.1.14:
5
Fixes: Coverity CID 1512459
4
"The host should not read the doorbell registers."
6
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
5
7
Message-Id: <20230824155345.109765-4-hreitz@redhat.com>
6
As we don't need read access, map the doorbells with write-only
8
Reviewed-by: Sam Li <faithilikerun@gmail.com>
7
permission. We keep a reference to this mapped address in the
8
BDRVNVMeState structure.
9
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-Id: <20200922083821.578519-3-philmd@redhat.com>
9
---
13
---
10
block/file-posix.c | 3 ++-
14
block/nvme.c | 29 +++++++++++++++++++----------
11
1 file changed, 2 insertions(+), 1 deletion(-)
15
1 file changed, 19 insertions(+), 10 deletions(-)
12
16
13
diff --git a/block/file-posix.c b/block/file-posix.c
17
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/block/file-posix.c
19
--- a/block/nvme.c
16
+++ b/block/file-posix.c
20
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ out:
21
@@ -XXX,XX +XXX,XX @@
18
}
22
#define NVME_SQ_ENTRY_BYTES 64
19
}
23
#define NVME_CQ_ENTRY_BYTES 16
20
} else {
24
#define NVME_QUEUE_SIZE 128
21
- if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
25
-#define NVME_BAR_SIZE 8192
22
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
26
+#define NVME_DOORBELL_SIZE 4096
23
+ bs->bl.zoned != BLK_Z_NONE) {
27
24
update_zones_wp(bs, s->fd, 0, 1);
28
/*
29
* We have to leave one slot empty as that is the full queue case where
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
31
/* Memory mapped registers */
32
typedef volatile struct {
33
NvmeBar ctrl;
34
- struct {
35
- uint32_t sq_tail;
36
- uint32_t cq_head;
37
- } doorbells[];
38
} NVMeRegs;
39
40
#define INDEX_ADMIN 0
41
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
42
AioContext *aio_context;
43
QEMUVFIOState *vfio;
44
NVMeRegs *regs;
45
+ /* Memory mapped registers */
46
+ volatile struct {
47
+ uint32_t sq_tail;
48
+ uint32_t cq_head;
49
+ } *doorbells;
50
/* The submission/completion queue pairs.
51
* [0]: admin queue.
52
* [1..]: io queues.
53
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
54
error_propagate(errp, local_err);
55
goto fail;
56
}
57
- q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
58
+ q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
59
60
nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
61
if (local_err) {
62
error_propagate(errp, local_err);
63
goto fail;
64
}
65
- q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
66
+ q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
67
68
return q;
69
fail:
70
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
71
goto out;
72
}
73
74
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
75
+ s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
76
PROT_READ | PROT_WRITE, errp);
77
if (!s->regs) {
78
ret = -EINVAL;
79
goto out;
80
}
81
-
82
/* Perform initialize sequence as described in NVMe spec "7.6.1
83
* Initialization". */
84
85
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
25
}
86
}
26
}
87
}
88
89
+ s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
90
+ NVME_DOORBELL_SIZE, PROT_WRITE, errp);
91
+ if (!s->doorbells) {
92
+ ret = -EINVAL;
93
+ goto out;
94
+ }
95
+
96
/* Set up admin queue. */
97
s->queues = g_new(NVMeQueuePair *, 1);
98
s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
99
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
100
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
101
false, NULL, NULL);
102
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
103
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
104
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
105
+ sizeof(NvmeBar), NVME_DOORBELL_SIZE);
106
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
107
qemu_vfio_close(s->vfio);
108
109
g_free(s->device);
27
--
110
--
28
2.41.0
111
2.26.2
112
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
We only access the I/O register in nvme_init().
4
Remove the reference in BDRVNVMeState and reduce its scope.
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200922083821.578519-4-philmd@redhat.com>
9
---
10
block/nvme.c | 29 ++++++++++++++++-------------
11
1 file changed, 16 insertions(+), 13 deletions(-)
12
13
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/nvme.c
16
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ enum {
18
struct BDRVNVMeState {
19
AioContext *aio_context;
20
QEMUVFIOState *vfio;
21
- NVMeRegs *regs;
22
/* Memory mapped registers */
23
volatile struct {
24
uint32_t sq_tail;
25
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
26
uint64_t timeout_ms;
27
uint64_t deadline, now;
28
Error *local_err = NULL;
29
+ NVMeRegs *regs;
30
31
qemu_co_mutex_init(&s->dma_map_lock);
32
qemu_co_queue_init(&s->dma_flush_queue);
33
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
34
goto out;
35
}
36
37
- s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
38
- PROT_READ | PROT_WRITE, errp);
39
- if (!s->regs) {
40
+ regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
41
+ PROT_READ | PROT_WRITE, errp);
42
+ if (!regs) {
43
ret = -EINVAL;
44
goto out;
45
}
46
/* Perform initialize sequence as described in NVMe spec "7.6.1
47
* Initialization". */
48
49
- cap = le64_to_cpu(s->regs->ctrl.cap);
50
+ cap = le64_to_cpu(regs->ctrl.cap);
51
if (!(cap & (1ULL << 37))) {
52
error_setg(errp, "Device doesn't support NVMe command set");
53
ret = -EINVAL;
54
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
55
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
56
57
/* Reset device to get a clean state. */
58
- s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
59
+ regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
60
/* Wait for CSTS.RDY = 0. */
61
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
62
- while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
63
+ while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
64
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
65
error_setg(errp, "Timeout while waiting for device to reset (%"
66
PRId64 " ms)",
67
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
68
}
69
s->nr_queues = 1;
70
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
71
- s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
72
- s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
73
- s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
74
+ regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
75
+ regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
76
+ regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
77
78
/* After setting up all control registers we can enable device now. */
79
- s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
80
+ regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
81
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
82
0x1);
83
/* Wait for CSTS.RDY = 1. */
84
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
85
deadline = now + timeout_ms * 1000000;
86
- while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
87
+ while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
88
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
89
error_setg(errp, "Timeout while waiting for device to start (%"
90
PRId64 " ms)",
91
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
92
ret = -EIO;
93
}
94
out:
95
+ if (regs) {
96
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar));
97
+ }
98
+
99
/* Cleaning up is done in nvme_file_open() upon error. */
100
return ret;
101
}
102
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
103
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
104
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
105
sizeof(NvmeBar), NVME_DOORBELL_SIZE);
106
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
107
qemu_vfio_close(s->vfio);
108
109
g_free(s->device);
110
--
111
2.26.2
112
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
NVMeRegs only contains NvmeBar. Simplify the code by using NvmeBar
4
directly.
5
6
This triggers a checkpatch.pl error:
7
8
ERROR: Use of volatile is usually wrong, please add a comment
9
#30: FILE: block/nvme.c:691:
10
+ volatile NvmeBar *regs;
11
12
This is a false positive as in our case we are using I/O registers,
13
so the 'volatile' use is justified.
14
15
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Message-Id: <20200922083821.578519-5-philmd@redhat.com>
18
---
19
block/nvme.c | 23 +++++++++--------------
20
1 file changed, 9 insertions(+), 14 deletions(-)
21
22
diff --git a/block/nvme.c b/block/nvme.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/nvme.c
25
+++ b/block/nvme.c
26
@@ -XXX,XX +XXX,XX @@ typedef struct {
27
QEMUBH *completion_bh;
28
} NVMeQueuePair;
29
30
-/* Memory mapped registers */
31
-typedef volatile struct {
32
- NvmeBar ctrl;
33
-} NVMeRegs;
34
-
35
#define INDEX_ADMIN 0
36
#define INDEX_IO(n) (1 + n)
37
38
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
39
uint64_t timeout_ms;
40
uint64_t deadline, now;
41
Error *local_err = NULL;
42
- NVMeRegs *regs;
43
+ volatile NvmeBar *regs = NULL;
44
45
qemu_co_mutex_init(&s->dma_map_lock);
46
qemu_co_queue_init(&s->dma_flush_queue);
47
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
48
/* Perform initialize sequence as described in NVMe spec "7.6.1
49
* Initialization". */
50
51
- cap = le64_to_cpu(regs->ctrl.cap);
52
+ cap = le64_to_cpu(regs->cap);
53
if (!(cap & (1ULL << 37))) {
54
error_setg(errp, "Device doesn't support NVMe command set");
55
ret = -EINVAL;
56
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
57
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
58
59
/* Reset device to get a clean state. */
60
- regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
61
+ regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
62
/* Wait for CSTS.RDY = 0. */
63
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
64
- while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
65
+ while (le32_to_cpu(regs->csts) & 0x1) {
66
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
67
error_setg(errp, "Timeout while waiting for device to reset (%"
68
PRId64 " ms)",
69
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
70
}
71
s->nr_queues = 1;
72
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
73
- regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
74
- regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
75
- regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
76
+ regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
77
+ regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
78
+ regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
79
80
/* After setting up all control registers we can enable device now. */
81
- regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
82
+ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
83
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
84
0x1);
85
/* Wait for CSTS.RDY = 1. */
86
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
87
deadline = now + timeout_ms * 1000000;
88
- while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
89
+ while (!(le32_to_cpu(regs->csts) & 0x1)) {
90
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
91
error_setg(errp, "Timeout while waiting for device to start (%"
92
PRId64 " ms)",
93
--
94
2.26.2
95
diff view generated by jsdifflib
1
Instead of checking bs->wps or bs->bl.zone_size for whether zone
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
information is present, check bs->bl.zoned. That is the flag that
3
raw_refresh_zoned_limits() reliably sets to indicate zone support. If
4
it is set to something other than BLK_Z_NONE, other values and objects
5
like bs->wps and bs->bl.zone_size must be non-null/zero and valid; if it
6
is not, we cannot rely on their validity.
7
2
8
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
3
Use the NVMe register definitions from "block/nvme.h" which
9
Message-Id: <20230824155345.109765-3-hreitz@redhat.com>
4
ease a bit reviewing the code while matching the datasheet.
10
Reviewed-by: Sam Li <faithilikerun@gmail.com>
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200922083821.578519-6-philmd@redhat.com>
11
---
9
---
12
block/file-posix.c | 12 +++++++-----
10
block/nvme.c | 21 +++++++++++----------
13
1 file changed, 7 insertions(+), 5 deletions(-)
11
1 file changed, 11 insertions(+), 10 deletions(-)
14
12
15
diff --git a/block/file-posix.c b/block/file-posix.c
13
diff --git a/block/nvme.c b/block/nvme.c
16
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
17
--- a/block/file-posix.c
15
--- a/block/nvme.c
18
+++ b/block/file-posix.c
16
+++ b/block/nvme.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
17
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
20
if (fd_open(bs) < 0)
18
* Initialization". */
21
return -EIO;
19
22
#if defined(CONFIG_BLKZONED)
20
cap = le64_to_cpu(regs->cap);
23
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
21
- if (!(cap & (1ULL << 37))) {
24
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
22
+ if (!NVME_CAP_CSS(cap)) {
25
+ bs->bl.zoned != BLK_Z_NONE) {
23
error_setg(errp, "Device doesn't support NVMe command set");
26
qemu_co_mutex_lock(&bs->wps->colock);
24
ret = -EINVAL;
27
- if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
25
goto out;
28
+ if (type & QEMU_AIO_ZONE_APPEND) {
29
int index = offset / bs->bl.zone_size;
30
offset = bs->wps->wp[index];
31
}
32
@@ -XXX,XX +XXX,XX @@ out:
33
{
34
BlockZoneWps *wps = bs->wps;
35
if (ret == 0) {
36
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
37
- && wps && bs->bl.zone_size) {
38
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
39
+ bs->bl.zoned != BLK_Z_NONE) {
40
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
41
if (!BDRV_ZT_IS_CONV(*wp)) {
42
if (type & QEMU_AIO_ZONE_APPEND) {
43
@@ -XXX,XX +XXX,XX @@ out:
44
}
45
}
26
}
46
27
47
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
28
- s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
48
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
29
- s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
49
+ bs->blk.zoned != BLK_Z_NONE) {
30
+ s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
50
qemu_co_mutex_unlock(&wps->colock);
31
+ s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
32
bs->bl.opt_mem_alignment = s->page_size;
33
- timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
34
+ timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
35
36
/* Reset device to get a clean state. */
37
regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
38
/* Wait for CSTS.RDY = 0. */
39
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
40
- while (le32_to_cpu(regs->csts) & 0x1) {
41
+ while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
42
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
43
error_setg(errp, "Timeout while waiting for device to reset (%"
44
PRId64 " ms)",
45
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
51
}
46
}
52
}
47
s->nr_queues = 1;
48
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
49
- regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
50
+ regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
51
+ (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
52
regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
53
regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
54
55
/* After setting up all control registers we can enable device now. */
56
- regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
57
- (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
58
- 0x1);
59
+ regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
60
+ (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
61
+ CC_EN_MASK);
62
/* Wait for CSTS.RDY = 1. */
63
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
64
deadline = now + timeout_ms * 1000000;
65
- while (!(le32_to_cpu(regs->csts) & 0x1)) {
66
+ while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
67
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
68
error_setg(errp, "Timeout while waiting for device to start (%"
69
PRId64 " ms)",
53
--
70
--
54
2.41.0
71
2.26.2
72
diff view generated by jsdifflib
1
bs->bl.zoned is what indicates whether the zone information is present
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
and valid; it is the only thing that raw_refresh_zoned_limits() sets if
3
CONFIG_BLKZONED is not defined, and it is also the only thing that it
4
sets if CONFIG_BLKZONED is defined, but there are no zones.
5
2
6
Make sure that it is always set to BLK_Z_NONE if there is an error
3
Use self-explicit SCALE_MS definition instead of magic value
7
anywhere in raw_refresh_zoned_limits() so that we do not accidentally
4
(missed in similar commit e4f310fe7f5).
8
announce zones while our information is incomplete or invalid.
9
5
10
This also fixes a memory leak in the last error path in
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
raw_refresh_zoned_limits().
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200922083821.578519-7-philmd@redhat.com>
9
---
10
block/nvme.c | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
12
12
13
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
diff --git a/block/nvme.c b/block/nvme.c
14
Message-Id: <20230824155345.109765-2-hreitz@redhat.com>
14
index XXXXXXX..XXXXXXX 100644
15
Reviewed-by: Sam Li <faithilikerun@gmail.com>
15
--- a/block/nvme.c
16
---
16
+++ b/block/nvme.c
17
block/file-posix.c | 21 ++++++++++++---------
17
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
18
1 file changed, 12 insertions(+), 9 deletions(-)
18
CC_EN_MASK);
19
/* Wait for CSTS.RDY = 1. */
20
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
21
- deadline = now + timeout_ms * 1000000;
22
+ deadline = now + timeout_ms * SCALE_MS;
23
while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
24
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
25
error_setg(errp, "Timeout while waiting for device to start (%"
26
--
27
2.26.2
19
28
20
diff --git a/block/file-posix.c b/block/file-posix.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/file-posix.c
23
+++ b/block/file-posix.c
24
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
25
BlockZoneModel zoned;
26
int ret;
27
28
- bs->bl.zoned = BLK_Z_NONE;
29
-
30
ret = get_sysfs_zoned_model(st, &zoned);
31
if (ret < 0 || zoned == BLK_Z_NONE) {
32
- return;
33
+ goto no_zoned;
34
}
35
bs->bl.zoned = zoned;
36
37
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
38
if (ret < 0) {
39
error_setg_errno(errp, -ret, "Unable to read chunk_sectors "
40
"sysfs attribute");
41
- return;
42
+ goto no_zoned;
43
} else if (!ret) {
44
error_setg(errp, "Read 0 from chunk_sectors sysfs attribute");
45
- return;
46
+ goto no_zoned;
47
}
48
bs->bl.zone_size = ret << BDRV_SECTOR_BITS;
49
50
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
51
if (ret < 0) {
52
error_setg_errno(errp, -ret, "Unable to read nr_zones "
53
"sysfs attribute");
54
- return;
55
+ goto no_zoned;
56
} else if (!ret) {
57
error_setg(errp, "Read 0 from nr_zones sysfs attribute");
58
- return;
59
+ goto no_zoned;
60
}
61
bs->bl.nr_zones = ret;
62
63
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
64
ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0);
65
if (ret < 0) {
66
error_setg_errno(errp, -ret, "report wps failed");
67
- bs->wps = NULL;
68
- return;
69
+ goto no_zoned;
70
}
71
qemu_co_mutex_init(&bs->wps->colock);
72
+ return;
73
+
74
+no_zoned:
75
+ bs->bl.zoned = BLK_Z_NONE;
76
+ g_free(bs->wps);
77
+ bs->wps = NULL;
78
}
79
#else /* !defined(CONFIG_BLKZONED) */
80
static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
81
--
82
2.41.0
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
'bool is_write' style is obsolete from throttle framework, adapt
3
This is the only coroutine wrapper from block.c and block/io.c which
4
block throttle groups to the new style:
4
doesn't return a value, so let's convert it to the common behavior, to
5
- use ThrottleDirection instead of 'bool is_write'. Ex,
5
simplify moving to generated coroutine wrappers in a further commit.
6
schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
7
-> schedule_next_request(ThrottleGroupMember *tgm, ThrottleDirection direction)
8
6
9
- use THROTTLE_MAX instead of hard code. Ex, ThrottleGroupMember *tokens[2]
7
Also, bdrv_invalidate_cache is a void function, returning error only
10
-> ThrottleGroupMember *tokens[THROTTLE_MAX]
8
through **errp parameter, which is considered to be bad practice, as
9
it forces callers to define and propagate local_err variable, so
10
conversion is good anyway.
11
11
12
- use ThrottleDirection instead of hard code on iteration. Ex, (i = 0; i < 2; i++)
12
This patch leaves the conversion of .bdrv_co_invalidate_cache() driver
13
-> for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++)
13
callbacks and bdrv_invalidate_cache_all() for another day.
14
14
15
Use a simple python script to test the new style:
15
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
#!/usr/bin/python3
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
import subprocess
17
Reviewed-by: Eric Blake <eblake@redhat.com>
18
import random
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
import time
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Message-Id: <20200924185414.28642-2-vsementsov@virtuozzo.com>
21
---
22
include/block/block.h | 2 +-
23
block.c | 32 ++++++++++++++++++--------------
24
2 files changed, 19 insertions(+), 15 deletions(-)
20
25
21
commands = ['virsh blkdeviotune jammy vda --write-bytes-sec ', \
26
diff --git a/include/block/block.h b/include/block/block.h
22
'virsh blkdeviotune jammy vda --write-iops-sec ', \
23
'virsh blkdeviotune jammy vda --read-bytes-sec ', \
24
'virsh blkdeviotune jammy vda --read-iops-sec ']
25
26
for loop in range(1, 1000):
27
time.sleep(random.randrange(3, 5))
28
command = commands[random.randrange(0, 3)] + str(random.randrange(0, 1000000))
29
subprocess.run(command, shell=True, check=True)
30
31
This works fine.
32
33
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
34
Message-Id: <20230728022006.1098509-10-pizhenwei@bytedance.com>
35
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
36
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
37
---
38
include/block/throttle-groups.h | 6 +-
39
block/block-backend.c | 4 +-
40
block/throttle-groups.c | 161 ++++++++++++++++----------------
41
block/throttle.c | 8 +-
42
4 files changed, 90 insertions(+), 89 deletions(-)
43
44
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
45
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
46
--- a/include/block/throttle-groups.h
28
--- a/include/block/block.h
47
+++ b/include/block/throttle-groups.h
29
+++ b/include/block/block.h
48
@@ -XXX,XX +XXX,XX @@ typedef struct ThrottleGroupMember {
30
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
49
AioContext *aio_context;
31
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
50
/* throttled_reqs_lock protects the CoQueues for throttled requests. */
32
51
CoMutex throttled_reqs_lock;
33
/* Invalidate any cached metadata used by image formats */
52
- CoQueue throttled_reqs[2];
34
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
53
+ CoQueue throttled_reqs[THROTTLE_MAX];
35
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
54
36
void bdrv_invalidate_cache_all(Error **errp);
55
/* Nonzero if the I/O limits are currently being ignored; generally
37
int bdrv_inactivate_all(void);
56
* it is zero. Accessed with atomic operations.
38
57
@@ -XXX,XX +XXX,XX @@ typedef struct ThrottleGroupMember {
39
diff --git a/block.c b/block.c
58
* throttle_state tells us if I/O limits are configured. */
59
ThrottleState *throttle_state;
60
ThrottleTimers throttle_timers;
61
- unsigned pending_reqs[2];
62
+ unsigned pending_reqs[THROTTLE_MAX];
63
QLIST_ENTRY(ThrottleGroupMember) round_robin;
64
65
} ThrottleGroupMember;
66
@@ -XXX,XX +XXX,XX @@ void throttle_group_restart_tgm(ThrottleGroupMember *tgm);
67
68
void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
69
int64_t bytes,
70
- bool is_write);
71
+ ThrottleDirection direction);
72
void throttle_group_attach_aio_context(ThrottleGroupMember *tgm,
73
AioContext *new_context);
74
void throttle_group_detach_aio_context(ThrottleGroupMember *tgm);
75
diff --git a/block/block-backend.c b/block/block-backend.c
76
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
77
--- a/block/block-backend.c
41
--- a/block.c
78
+++ b/block/block-backend.c
42
+++ b/block.c
79
@@ -XXX,XX +XXX,XX @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
43
@@ -XXX,XX +XXX,XX @@ void bdrv_init_with_whitelist(void)
80
/* throttling disk I/O */
44
bdrv_init();
81
if (blk->public.throttle_group_member.throttle_state) {
45
}
82
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
46
83
- bytes, false);
47
-static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
84
+ bytes, THROTTLE_READ);
48
- Error **errp)
49
+static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
50
+ Error **errp)
51
{
52
BdrvChild *child, *parent;
53
uint64_t perm, shared_perm;
54
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
55
BdrvDirtyBitmap *bm;
56
57
if (!bs->drv) {
58
- return;
59
+ return -ENOMEDIUM;
85
}
60
}
86
61
87
ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
62
QLIST_FOREACH(child, &bs->children, next) {
88
@@ -XXX,XX +XXX,XX @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
63
bdrv_co_invalidate_cache(child->bs, &local_err);
89
/* throttling disk I/O */
64
if (local_err) {
90
if (blk->public.throttle_group_member.throttle_state) {
65
error_propagate(errp, local_err);
91
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
66
- return;
92
- bytes, true);
67
+ return -EINVAL;
93
+ bytes, THROTTLE_WRITE);
68
}
94
}
69
}
95
70
96
if (!blk->enable_write_cache) {
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
97
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
72
ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
98
index XXXXXXX..XXXXXXX 100644
73
if (ret < 0) {
99
--- a/block/throttle-groups.c
74
bs->open_flags |= BDRV_O_INACTIVE;
100
+++ b/block/throttle-groups.c
75
- return;
101
@@ -XXX,XX +XXX,XX @@
76
+ return ret;
102
77
}
103
static void throttle_group_obj_init(Object *obj);
78
bdrv_set_perm(bs, perm, shared_perm);
104
static void throttle_group_obj_complete(UserCreatable *obj, Error **errp);
79
105
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write);
80
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
106
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction);
81
if (local_err) {
107
82
bs->open_flags |= BDRV_O_INACTIVE;
108
/* The ThrottleGroup structure (with its ThrottleState) is shared
83
error_propagate(errp, local_err);
109
* among different ThrottleGroupMembers and it's independent from
84
- return;
110
@@ -XXX,XX +XXX,XX @@ struct ThrottleGroup {
85
+ return -EINVAL;
111
QemuMutex lock; /* This lock protects the following four fields */
86
}
112
ThrottleState ts;
87
}
113
QLIST_HEAD(, ThrottleGroupMember) head;
88
114
- ThrottleGroupMember *tokens[2];
89
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
115
- bool any_timer_armed[2];
90
if (ret < 0) {
116
+ ThrottleGroupMember *tokens[THROTTLE_MAX];
91
bs->open_flags |= BDRV_O_INACTIVE;
117
+ bool any_timer_armed[THROTTLE_MAX];
92
error_setg_errno(errp, -ret, "Could not refresh total sector count");
118
QEMUClockType clock_type;
93
- return;
119
94
+ return ret;
120
/* This field is protected by the global QEMU mutex */
95
}
121
@@ -XXX,XX +XXX,XX @@ static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm)
122
* This assumes that tg->lock is held.
123
*
124
* @tgm: the ThrottleGroupMember
125
- * @is_write: the type of operation (read/write)
126
+ * @direction: the ThrottleDirection
127
* @ret: whether the ThrottleGroupMember has pending requests.
128
*/
129
static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
130
- bool is_write)
131
+ ThrottleDirection direction)
132
{
133
- return tgm->pending_reqs[is_write];
134
+ return tgm->pending_reqs[direction];
135
}
136
137
/* Return the next ThrottleGroupMember in the round-robin sequence with pending
138
@@ -XXX,XX +XXX,XX @@ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
139
* This assumes that tg->lock is held.
140
*
141
* @tgm: the current ThrottleGroupMember
142
- * @is_write: the type of operation (read/write)
143
+ * @direction: the ThrottleDirection
144
* @ret: the next ThrottleGroupMember with pending requests, or tgm if
145
* there is none.
146
*/
147
static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
148
- bool is_write)
149
+ ThrottleDirection direction)
150
{
151
ThrottleState *ts = tgm->throttle_state;
152
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
153
@@ -XXX,XX +XXX,XX @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
154
* it's being drained. Skip the round-robin search and return tgm
155
* immediately if it has pending requests. Otherwise we could be
156
* forcing it to wait for other member's throttled requests. */
157
- if (tgm_has_pending_reqs(tgm, is_write) &&
158
+ if (tgm_has_pending_reqs(tgm, direction) &&
159
qatomic_read(&tgm->io_limits_disabled)) {
160
return tgm;
161
}
96
}
162
97
163
- start = token = tg->tokens[is_write];
98
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
164
+ start = token = tg->tokens[direction];
99
if (local_err) {
165
100
bs->open_flags |= BDRV_O_INACTIVE;
166
/* get next bs round in round robin style */
101
error_propagate(errp, local_err);
167
token = throttle_group_next_tgm(token);
102
- return;
168
- while (token != start && !tgm_has_pending_reqs(token, is_write)) {
103
+ return -EINVAL;
169
+ while (token != start && !tgm_has_pending_reqs(token, direction)) {
170
token = throttle_group_next_tgm(token);
171
}
172
173
@@ -XXX,XX +XXX,XX @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
174
* then decide the token is the current tgm because chances are
175
* the current tgm got the current request queued.
176
*/
177
- if (token == start && !tgm_has_pending_reqs(token, is_write)) {
178
+ if (token == start && !tgm_has_pending_reqs(token, direction)) {
179
token = tgm;
180
}
181
182
/* Either we return the original TGM, or one with pending requests */
183
- assert(token == tgm || tgm_has_pending_reqs(token, is_write));
184
+ assert(token == tgm || tgm_has_pending_reqs(token, direction));
185
186
return token;
187
}
188
@@ -XXX,XX +XXX,XX @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
189
* This assumes that tg->lock is held.
190
*
191
* @tgm: the current ThrottleGroupMember
192
- * @is_write: the type of operation (read/write)
193
+ * @direction: the ThrottleDirection
194
* @ret: whether the I/O request needs to be throttled or not
195
*/
196
static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
197
- bool is_write)
198
+ ThrottleDirection direction)
199
{
200
ThrottleState *ts = tgm->throttle_state;
201
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
202
ThrottleTimers *tt = &tgm->throttle_timers;
203
- ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
204
bool must_wait;
205
206
if (qatomic_read(&tgm->io_limits_disabled)) {
207
@@ -XXX,XX +XXX,XX @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
208
}
209
210
/* Check if any of the timers in this group is already armed */
211
- if (tg->any_timer_armed[is_write]) {
212
+ if (tg->any_timer_armed[direction]) {
213
return true;
214
}
215
216
@@ -XXX,XX +XXX,XX @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
217
218
/* If a timer just got armed, set tgm as the current token */
219
if (must_wait) {
220
- tg->tokens[is_write] = tgm;
221
- tg->any_timer_armed[is_write] = true;
222
+ tg->tokens[direction] = tgm;
223
+ tg->any_timer_armed[direction] = true;
224
}
225
226
return must_wait;
227
@@ -XXX,XX +XXX,XX @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
228
* any request was actually pending.
229
*
230
* @tgm: the current ThrottleGroupMember
231
- * @is_write: the type of operation (read/write)
232
+ * @direction: the ThrottleDirection
233
*/
234
static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm,
235
- bool is_write)
236
+ ThrottleDirection direction)
237
{
238
bool ret;
239
240
qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
241
- ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]);
242
+ ret = qemu_co_queue_next(&tgm->throttled_reqs[direction]);
243
qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
244
245
return ret;
246
@@ -XXX,XX +XXX,XX @@ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tg
247
* This assumes that tg->lock is held.
248
*
249
* @tgm: the current ThrottleGroupMember
250
- * @is_write: the type of operation (read/write)
251
+ * @direction: the ThrottleDirection
252
*/
253
-static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
254
+static void schedule_next_request(ThrottleGroupMember *tgm,
255
+ ThrottleDirection direction)
256
{
257
ThrottleState *ts = tgm->throttle_state;
258
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
259
@@ -XXX,XX +XXX,XX @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
260
ThrottleGroupMember *token;
261
262
/* Check if there's any pending request to schedule next */
263
- token = next_throttle_token(tgm, is_write);
264
- if (!tgm_has_pending_reqs(token, is_write)) {
265
+ token = next_throttle_token(tgm, direction);
266
+ if (!tgm_has_pending_reqs(token, direction)) {
267
return;
268
}
269
270
/* Set a timer for the request if it needs to be throttled */
271
- must_wait = throttle_group_schedule_timer(token, is_write);
272
+ must_wait = throttle_group_schedule_timer(token, direction);
273
274
/* If it doesn't have to wait, queue it for immediate execution */
275
if (!must_wait) {
276
/* Give preference to requests from the current tgm */
277
if (qemu_in_coroutine() &&
278
- throttle_group_co_restart_queue(tgm, is_write)) {
279
+ throttle_group_co_restart_queue(tgm, direction)) {
280
token = tgm;
281
} else {
282
ThrottleTimers *tt = &token->throttle_timers;
283
int64_t now = qemu_clock_get_ns(tg->clock_type);
284
- timer_mod(tt->timers[is_write], now);
285
- tg->any_timer_armed[is_write] = true;
286
+ timer_mod(tt->timers[direction], now);
287
+ tg->any_timer_armed[direction] = true;
288
}
289
- tg->tokens[is_write] = token;
290
+ tg->tokens[direction] = token;
291
}
292
}
293
294
@@ -XXX,XX +XXX,XX @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
295
*
296
* @tgm: the current ThrottleGroupMember
297
* @bytes: the number of bytes for this I/O
298
- * @is_write: the type of operation (read/write)
299
+ * @direction: the ThrottleDirection
300
*/
301
void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
302
int64_t bytes,
303
- bool is_write)
304
+ ThrottleDirection direction)
305
{
306
bool must_wait;
307
ThrottleGroupMember *token;
308
ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
309
- ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
310
311
assert(bytes >= 0);
312
+ assert(direction < THROTTLE_MAX);
313
314
qemu_mutex_lock(&tg->lock);
315
316
/* First we check if this I/O has to be throttled. */
317
- token = next_throttle_token(tgm, is_write);
318
- must_wait = throttle_group_schedule_timer(token, is_write);
319
+ token = next_throttle_token(tgm, direction);
320
+ must_wait = throttle_group_schedule_timer(token, direction);
321
322
/* Wait if there's a timer set or queued requests of this type */
323
- if (must_wait || tgm->pending_reqs[is_write]) {
324
- tgm->pending_reqs[is_write]++;
325
+ if (must_wait || tgm->pending_reqs[direction]) {
326
+ tgm->pending_reqs[direction]++;
327
qemu_mutex_unlock(&tg->lock);
328
qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
329
- qemu_co_queue_wait(&tgm->throttled_reqs[is_write],
330
+ qemu_co_queue_wait(&tgm->throttled_reqs[direction],
331
&tgm->throttled_reqs_lock);
332
qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
333
qemu_mutex_lock(&tg->lock);
334
- tgm->pending_reqs[is_write]--;
335
+ tgm->pending_reqs[direction]--;
336
}
337
338
/* The I/O will be executed, so do the accounting */
339
throttle_account(tgm->throttle_state, direction, bytes);
340
341
/* Schedule the next request */
342
- schedule_next_request(tgm, is_write);
343
+ schedule_next_request(tgm, direction);
344
345
qemu_mutex_unlock(&tg->lock);
346
}
347
348
typedef struct {
349
ThrottleGroupMember *tgm;
350
- bool is_write;
351
+ ThrottleDirection direction;
352
} RestartData;
353
354
static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
355
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
356
ThrottleGroupMember *tgm = data->tgm;
357
ThrottleState *ts = tgm->throttle_state;
358
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
359
- bool is_write = data->is_write;
360
+ ThrottleDirection direction = data->direction;
361
bool empty_queue;
362
363
- empty_queue = !throttle_group_co_restart_queue(tgm, is_write);
364
+ empty_queue = !throttle_group_co_restart_queue(tgm, direction);
365
366
/* If the request queue was empty then we have to take care of
367
* scheduling the next one */
368
if (empty_queue) {
369
qemu_mutex_lock(&tg->lock);
370
- schedule_next_request(tgm, is_write);
371
+ schedule_next_request(tgm, direction);
372
qemu_mutex_unlock(&tg->lock);
373
}
374
375
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
376
aio_wait_kick();
377
}
378
379
-static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write)
380
+static void throttle_group_restart_queue(ThrottleGroupMember *tgm,
381
+ ThrottleDirection direction)
382
{
383
Coroutine *co;
384
RestartData *rd = g_new0(RestartData, 1);
385
386
rd->tgm = tgm;
387
- rd->is_write = is_write;
388
+ rd->direction = direction;
389
390
/* This function is called when a timer is fired or when
391
* throttle_group_restart_tgm() is called. Either way, there can
392
* be no timer pending on this tgm at this point */
393
- assert(!timer_pending(tgm->throttle_timers.timers[is_write]));
394
+ assert(!timer_pending(tgm->throttle_timers.timers[direction]));
395
396
qatomic_inc(&tgm->restart_pending);
397
398
@@ -XXX,XX +XXX,XX @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write
399
400
void throttle_group_restart_tgm(ThrottleGroupMember *tgm)
401
{
402
- int i;
403
+ ThrottleDirection dir;
404
405
if (tgm->throttle_state) {
406
- for (i = 0; i < 2; i++) {
407
- QEMUTimer *t = tgm->throttle_timers.timers[i];
408
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
409
+ QEMUTimer *t = tgm->throttle_timers.timers[dir];
410
if (timer_pending(t)) {
411
/* If there's a pending timer on this tgm, fire it now */
412
timer_del(t);
413
- timer_cb(tgm, i);
414
+ timer_cb(tgm, dir);
415
} else {
416
/* Else run the next request from the queue manually */
417
- throttle_group_restart_queue(tgm, i);
418
+ throttle_group_restart_queue(tgm, dir);
419
}
104
}
420
}
105
}
421
}
106
}
422
@@ -XXX,XX +XXX,XX @@ void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg)
107
+
423
* because it had been throttled.
108
+ return 0;
424
*
109
}
425
* @tgm: the ThrottleGroupMember whose request had been throttled
110
426
- * @is_write: the type of operation (read/write)
111
typedef struct InvalidateCacheCo {
427
+ * @direction: the ThrottleDirection
112
BlockDriverState *bs;
428
*/
113
Error **errp;
429
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write)
114
bool done;
430
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction)
115
+ int ret;
116
} InvalidateCacheCo;
117
118
static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
431
{
119
{
432
ThrottleState *ts = tgm->throttle_state;
120
InvalidateCacheCo *ico = opaque;
433
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
121
- bdrv_co_invalidate_cache(ico->bs, ico->errp);
434
122
+ ico->ret = bdrv_co_invalidate_cache(ico->bs, ico->errp);
435
/* The timer has just been fired, so we can update the flag */
123
ico->done = true;
436
qemu_mutex_lock(&tg->lock);
124
aio_wait_kick();
437
- tg->any_timer_armed[is_write] = false;
438
+ tg->any_timer_armed[direction] = false;
439
qemu_mutex_unlock(&tg->lock);
440
441
/* Run the request that was waiting for this timer */
442
- throttle_group_restart_queue(tgm, is_write);
443
+ throttle_group_restart_queue(tgm, direction);
444
}
125
}
445
126
446
static void read_timer_cb(void *opaque)
127
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
128
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
447
{
129
{
448
- timer_cb(opaque, false);
130
Coroutine *co;
449
+ timer_cb(opaque, THROTTLE_READ);
131
InvalidateCacheCo ico = {
132
@@ -XXX,XX +XXX,XX @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
133
bdrv_coroutine_enter(bs, co);
134
BDRV_POLL_WHILE(bs, !ico.done);
135
}
136
+
137
+ return ico.ret;
450
}
138
}
451
139
452
static void write_timer_cb(void *opaque)
140
void bdrv_invalidate_cache_all(Error **errp)
453
{
141
{
454
- timer_cb(opaque, true);
142
BlockDriverState *bs;
455
+ timer_cb(opaque, THROTTLE_WRITE);
143
- Error *local_err = NULL;
456
}
144
BdrvNextIterator it;
457
145
458
/* Register a ThrottleGroupMember from the throttling group, also initializing
146
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
459
@@ -XXX,XX +XXX,XX @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
147
AioContext *aio_context = bdrv_get_aio_context(bs);
460
const char *groupname,
148
+ int ret;
461
AioContext *ctx)
149
462
{
150
aio_context_acquire(aio_context);
463
- int i;
151
- bdrv_invalidate_cache(bs, &local_err);
464
+ ThrottleDirection dir;
152
+ ret = bdrv_invalidate_cache(bs, errp);
465
ThrottleState *ts = throttle_group_incref(groupname);
153
aio_context_release(aio_context);
466
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
154
- if (local_err) {
467
155
- error_propagate(errp, local_err);
468
@@ -XXX,XX +XXX,XX @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
156
+ if (ret < 0) {
469
157
bdrv_next_cleanup(&it);
470
QEMU_LOCK_GUARD(&tg->lock);
158
return;
471
/* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
472
- for (i = 0; i < 2; i++) {
473
- if (!tg->tokens[i]) {
474
- tg->tokens[i] = tgm;
475
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
476
+ if (!tg->tokens[dir]) {
477
+ tg->tokens[dir] = tgm;
478
}
159
}
479
+ qemu_co_queue_init(&tgm->throttled_reqs[dir]);
480
}
481
482
QLIST_INSERT_HEAD(&tg->head, tgm, round_robin);
483
@@ -XXX,XX +XXX,XX @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
484
write_timer_cb,
485
tgm);
486
qemu_co_mutex_init(&tgm->throttled_reqs_lock);
487
- qemu_co_queue_init(&tgm->throttled_reqs[0]);
488
- qemu_co_queue_init(&tgm->throttled_reqs[1]);
489
}
490
491
/* Unregister a ThrottleGroupMember from its group, removing it from the list,
492
@@ -XXX,XX +XXX,XX @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
493
ThrottleState *ts = tgm->throttle_state;
494
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
495
ThrottleGroupMember *token;
496
- int i;
497
+ ThrottleDirection dir;
498
499
if (!ts) {
500
/* Discard already unregistered tgm */
501
@@ -XXX,XX +XXX,XX @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
502
AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0);
503
504
WITH_QEMU_LOCK_GUARD(&tg->lock) {
505
- for (i = 0; i < 2; i++) {
506
- assert(tgm->pending_reqs[i] == 0);
507
- assert(qemu_co_queue_empty(&tgm->throttled_reqs[i]));
508
- assert(!timer_pending(tgm->throttle_timers.timers[i]));
509
- if (tg->tokens[i] == tgm) {
510
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
511
+ assert(tgm->pending_reqs[dir] == 0);
512
+ assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
513
+ assert(!timer_pending(tgm->throttle_timers.timers[dir]));
514
+ if (tg->tokens[dir] == tgm) {
515
token = throttle_group_next_tgm(tgm);
516
/* Take care of the case where this is the last tgm in the group */
517
if (token == tgm) {
518
token = NULL;
519
}
520
- tg->tokens[i] = token;
521
+ tg->tokens[dir] = token;
522
}
523
}
524
525
@@ -XXX,XX +XXX,XX @@ void throttle_group_detach_aio_context(ThrottleGroupMember *tgm)
526
{
527
ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
528
ThrottleTimers *tt = &tgm->throttle_timers;
529
- int i;
530
+ ThrottleDirection dir;
531
532
/* Requests must have been drained */
533
- assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0);
534
- assert(qemu_co_queue_empty(&tgm->throttled_reqs[0]));
535
- assert(qemu_co_queue_empty(&tgm->throttled_reqs[1]));
536
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
537
+ assert(tgm->pending_reqs[dir] == 0);
538
+ assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
539
+ }
540
541
/* Kick off next ThrottleGroupMember, if necessary */
542
WITH_QEMU_LOCK_GUARD(&tg->lock) {
543
- for (i = 0; i < 2; i++) {
544
- if (timer_pending(tt->timers[i])) {
545
- tg->any_timer_armed[i] = false;
546
- schedule_next_request(tgm, i);
547
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
548
+ if (timer_pending(tt->timers[dir])) {
549
+ tg->any_timer_armed[dir] = false;
550
+ schedule_next_request(tgm, dir);
551
}
552
}
553
}
554
diff --git a/block/throttle.c b/block/throttle.c
555
index XXXXXXX..XXXXXXX 100644
556
--- a/block/throttle.c
557
+++ b/block/throttle.c
558
@@ -XXX,XX +XXX,XX @@ throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
559
{
560
561
ThrottleGroupMember *tgm = bs->opaque;
562
- throttle_group_co_io_limits_intercept(tgm, bytes, false);
563
+ throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_READ);
564
565
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
566
}
567
@@ -XXX,XX +XXX,XX @@ throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
568
QEMUIOVector *qiov, BdrvRequestFlags flags)
569
{
570
ThrottleGroupMember *tgm = bs->opaque;
571
- throttle_group_co_io_limits_intercept(tgm, bytes, true);
572
+ throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
573
574
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
575
}
576
@@ -XXX,XX +XXX,XX @@ throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
577
BdrvRequestFlags flags)
578
{
579
ThrottleGroupMember *tgm = bs->opaque;
580
- throttle_group_co_io_limits_intercept(tgm, bytes, true);
581
+ throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
582
583
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
584
}
585
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn GRAPH_RDLOCK
586
throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
587
{
588
ThrottleGroupMember *tgm = bs->opaque;
589
- throttle_group_co_io_limits_intercept(tgm, bytes, true);
590
+ throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
591
592
return bdrv_co_pdiscard(bs->file, offset, bytes);
593
}
594
--
160
--
595
2.41.0
161
2.26.2
162
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Use enum ThrottleDirection instead of number index.
3
Most of our coroutine wrappers already follow this convention:
4
4
5
Reviewed-by: Alberto Garcia <berto@igalia.com>
5
We have 'coroutine_fn bdrv_co_<something>(<normal argument list>)' as
6
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
6
the core function, and a wrapper 'bdrv_<something>(<same argument
7
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
7
list>)' which does parameter packing and calls bdrv_run_co().
8
Message-Id: <20230728022006.1098509-2-pizhenwei@bytedance.com>
8
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
9
The only outsiders are the bdrv_prwv_co and
10
bdrv_common_block_status_above wrappers. Let's refactor them to behave
11
as the others, it simplifies further conversion of coroutine wrappers.
12
13
This patch adds an indirection layer, but it will be compensated by
14
a further commit, which will drop bdrv_co_prwv together with the
15
is_write logic, to keep the read and write paths separate.
16
17
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Reviewed-by: Eric Blake <eblake@redhat.com>
20
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
21
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-Id: <20200924185414.28642-3-vsementsov@virtuozzo.com>
10
---
23
---
11
include/qemu/throttle.h | 11 ++++++++---
24
block/io.c | 60 +++++++++++++++++++++++++++++-------------------------
12
util/throttle.c | 16 +++++++++-------
25
1 file changed, 32 insertions(+), 28 deletions(-)
13
2 files changed, 17 insertions(+), 10 deletions(-)
14
26
15
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
27
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/throttle.h
29
--- a/block/io.c
18
+++ b/include/qemu/throttle.h
30
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct ThrottleState {
31
@@ -XXX,XX +XXX,XX @@ typedef struct RwCo {
20
int64_t previous_leak; /* timestamp of the last leak done */
32
BdrvRequestFlags flags;
21
} ThrottleState;
33
} RwCo;
22
34
23
+typedef enum {
35
+static int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
24
+ THROTTLE_READ = 0,
36
+ QEMUIOVector *qiov, bool is_write,
25
+ THROTTLE_WRITE,
37
+ BdrvRequestFlags flags)
26
+ THROTTLE_MAX
38
+{
27
+} ThrottleDirection;
39
+ if (is_write) {
40
+ return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
41
+ } else {
42
+ return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
43
+ }
44
+}
28
+
45
+
29
typedef struct ThrottleTimers {
46
static int coroutine_fn bdrv_rw_co_entry(void *opaque)
30
- QEMUTimer *timers[2]; /* timers used to do the throttling */
31
+ QEMUTimer *timers[THROTTLE_MAX]; /* timers used to do the throttling */
32
QEMUClockType clock_type; /* the clock used */
33
34
/* Callbacks */
35
- QEMUTimerCB *read_timer_cb;
36
- QEMUTimerCB *write_timer_cb;
37
+ QEMUTimerCB *timer_cb[THROTTLE_MAX];
38
void *timer_opaque;
39
} ThrottleTimers;
40
41
diff --git a/util/throttle.c b/util/throttle.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/util/throttle.c
44
+++ b/util/throttle.c
45
@@ -XXX,XX +XXX,XX @@ static bool throttle_compute_timer(ThrottleState *ts,
46
void throttle_timers_attach_aio_context(ThrottleTimers *tt,
47
AioContext *new_context)
48
{
47
{
49
- tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
48
RwCo *rwco = opaque;
50
- tt->read_timer_cb, tt->timer_opaque);
49
51
- tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
50
- if (!rwco->is_write) {
52
- tt->write_timer_cb, tt->timer_opaque);
51
- return bdrv_co_preadv(rwco->child, rwco->offset,
53
+ tt->timers[THROTTLE_READ] =
52
- rwco->qiov->size, rwco->qiov,
54
+ aio_timer_new(new_context, tt->clock_type, SCALE_NS,
53
- rwco->flags);
55
+ tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
54
- } else {
56
+ tt->timers[THROTTLE_WRITE] =
55
- return bdrv_co_pwritev(rwco->child, rwco->offset,
57
+ aio_timer_new(new_context, tt->clock_type, SCALE_NS,
56
- rwco->qiov->size, rwco->qiov,
58
+ tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
57
- rwco->flags);
58
- }
59
+ return bdrv_co_prwv(rwco->child, rwco->offset, rwco->qiov,
60
+ rwco->is_write, rwco->flags);
59
}
61
}
60
62
61
/*
63
/*
62
@@ -XXX,XX +XXX,XX @@ void throttle_timers_init(ThrottleTimers *tt,
64
* Process a vectored synchronous request using coroutines
63
memset(tt, 0, sizeof(ThrottleTimers));
65
*/
64
66
-static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
65
tt->clock_type = clock_type;
67
- QEMUIOVector *qiov, bool is_write,
66
- tt->read_timer_cb = read_timer_cb;
68
- BdrvRequestFlags flags)
67
- tt->write_timer_cb = write_timer_cb;
69
+static int bdrv_prwv(BdrvChild *child, int64_t offset,
68
+ tt->timer_cb[THROTTLE_READ] = read_timer_cb;
70
+ QEMUIOVector *qiov, bool is_write,
69
+ tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
71
+ BdrvRequestFlags flags)
70
tt->timer_opaque = timer_opaque;
72
{
71
throttle_timers_attach_aio_context(tt, aio_context);
73
RwCo rwco = {
74
.child = child,
75
@@ -XXX,XX +XXX,XX @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
76
{
77
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
78
79
- return bdrv_prwv_co(child, offset, &qiov, true,
80
- BDRV_REQ_ZERO_WRITE | flags);
81
+ return bdrv_prwv(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags);
72
}
82
}
73
@@ -XXX,XX +XXX,XX @@ void throttle_timers_detach_aio_context(ThrottleTimers *tt)
83
84
/*
85
@@ -XXX,XX +XXX,XX @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
74
{
86
{
75
int i;
87
int ret;
76
88
77
- for (i = 0; i < 2; i++) {
89
- ret = bdrv_prwv_co(child, offset, qiov, false, 0);
78
+ for (i = 0; i < THROTTLE_MAX; i++) {
90
+ ret = bdrv_prwv(child, offset, qiov, false, 0);
79
throttle_timer_destroy(&tt->timers[i]);
91
if (ret < 0) {
92
return ret;
80
}
93
}
94
@@ -XXX,XX +XXX,XX @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
95
{
96
int ret;
97
98
- ret = bdrv_prwv_co(child, offset, qiov, true, 0);
99
+ ret = bdrv_prwv(child, offset, qiov, true, 0);
100
if (ret < 0) {
101
return ret;
102
}
103
@@ -XXX,XX +XXX,XX @@ early_out:
104
return ret;
81
}
105
}
106
107
-static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
108
- BlockDriverState *base,
109
- bool want_zero,
110
- int64_t offset,
111
- int64_t bytes,
112
- int64_t *pnum,
113
- int64_t *map,
114
- BlockDriverState **file)
115
+static int coroutine_fn
116
+bdrv_co_common_block_status_above(BlockDriverState *bs,
117
+ BlockDriverState *base,
118
+ bool want_zero,
119
+ int64_t offset,
120
+ int64_t bytes,
121
+ int64_t *pnum,
122
+ int64_t *map,
123
+ BlockDriverState **file)
124
{
125
BlockDriverState *p;
126
int ret = 0;
127
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
128
{
129
BdrvCoBlockStatusData *data = opaque;
130
131
- return bdrv_co_block_status_above(data->bs, data->base,
132
- data->want_zero,
133
- data->offset, data->bytes,
134
- data->pnum, data->map, data->file);
135
+ return bdrv_co_common_block_status_above(data->bs, data->base,
136
+ data->want_zero,
137
+ data->offset, data->bytes,
138
+ data->pnum, data->map, data->file);
139
}
140
141
/*
82
--
142
--
83
2.41.0
143
2.26.2
144
diff view generated by jsdifflib
1
We duplicate the same condition three times here, pull it out to the top
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
level.
2
3
3
We are going to keep coroutine-wrappers code (structure-packing
4
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
4
parameters, BDRV_POLL wrapper functions) in separate auto-generated
5
Message-Id: <20230824155345.109765-5-hreitz@redhat.com>
5
files. So, we'll need a header with declaration of original _co_
6
Reviewed-by: Sam Li <faithilikerun@gmail.com>
6
functions, for those which are static now. As well, we'll need
7
declarations for wrapper functions. Do these declarations now, as a
8
preparation step.
9
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
14
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-Id: <20200924185414.28642-4-vsementsov@virtuozzo.com>
7
---
16
---
8
block/file-posix.c | 18 +++++-------------
17
block/coroutines.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++
9
1 file changed, 5 insertions(+), 13 deletions(-)
18
block.c | 8 +++---
10
19
block/io.c | 34 +++++++++++------------
11
diff --git a/block/file-posix.c b/block/file-posix.c
20
3 files changed, 88 insertions(+), 21 deletions(-)
21
create mode 100644 block/coroutines.h
22
23
diff --git a/block/coroutines.h b/block/coroutines.h
24
new file mode 100644
25
index XXXXXXX..XXXXXXX
26
--- /dev/null
27
+++ b/block/coroutines.h
28
@@ -XXX,XX +XXX,XX @@
29
+/*
30
+ * Block layer I/O functions
31
+ *
32
+ * Copyright (c) 2003 Fabrice Bellard
33
+ *
34
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
35
+ * of this software and associated documentation files (the "Software"), to deal
36
+ * in the Software without restriction, including without limitation the rights
37
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
38
+ * copies of the Software, and to permit persons to whom the Software is
39
+ * furnished to do so, subject to the following conditions:
40
+ *
41
+ * The above copyright notice and this permission notice shall be included in
42
+ * all copies or substantial portions of the Software.
43
+ *
44
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
47
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
49
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
50
+ * THE SOFTWARE.
51
+ */
52
+
53
+#ifndef BLOCK_COROUTINES_INT_H
54
+#define BLOCK_COROUTINES_INT_H
55
+
56
+#include "block/block_int.h"
57
+
58
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
59
+ BdrvCheckResult *res, BdrvCheckMode fix);
60
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
61
+
62
+int coroutine_fn
63
+bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
64
+ bool is_write, BdrvRequestFlags flags);
65
+int
66
+bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
67
+ bool is_write, BdrvRequestFlags flags);
68
+
69
+int coroutine_fn
70
+bdrv_co_common_block_status_above(BlockDriverState *bs,
71
+ BlockDriverState *base,
72
+ bool want_zero,
73
+ int64_t offset,
74
+ int64_t bytes,
75
+ int64_t *pnum,
76
+ int64_t *map,
77
+ BlockDriverState **file);
78
+int
79
+bdrv_common_block_status_above(BlockDriverState *bs,
80
+ BlockDriverState *base,
81
+ bool want_zero,
82
+ int64_t offset,
83
+ int64_t bytes,
84
+ int64_t *pnum,
85
+ int64_t *map,
86
+ BlockDriverState **file);
87
+
88
+int coroutine_fn
89
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
90
+ bool is_read);
91
+int
92
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
93
+ bool is_read);
94
+
95
+#endif /* BLOCK_COROUTINES_INT_H */
96
diff --git a/block.c b/block.c
12
index XXXXXXX..XXXXXXX 100644
97
index XXXXXXX..XXXXXXX 100644
13
--- a/block/file-posix.c
98
--- a/block.c
14
+++ b/block/file-posix.c
99
+++ b/block.c
15
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
100
@@ -XXX,XX +XXX,XX @@
16
101
#include "qemu/timer.h"
17
out:
102
#include "qemu/cutils.h"
18
#if defined(CONFIG_BLKZONED)
103
#include "qemu/id.h"
19
-{
104
+#include "block/coroutines.h"
20
- BlockZoneWps *wps = bs->wps;
105
21
- if (ret == 0) {
106
#ifdef CONFIG_BSD
22
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
107
#include <sys/ioctl.h>
23
- bs->bl.zoned != BLK_Z_NONE) {
108
@@ -XXX,XX +XXX,XX @@ static void bdrv_delete(BlockDriverState *bs)
24
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
109
* free of errors) or -errno when an internal error occurred. The results of the
25
+ bs->bl.zoned != BLK_Z_NONE) {
110
* check are stored in res.
26
+ BlockZoneWps *wps = bs->wps;
111
*/
27
+ if (ret == 0) {
112
-static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
28
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
113
- BdrvCheckResult *res, BdrvCheckMode fix)
29
if (!BDRV_ZT_IS_CONV(*wp)) {
114
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
30
if (type & QEMU_AIO_ZONE_APPEND) {
115
+ BdrvCheckResult *res, BdrvCheckMode fix)
31
@@ -XXX,XX +XXX,XX @@ out:
116
{
32
*wp = offset + bytes;
117
if (bs->drv == NULL) {
33
}
118
return -ENOMEDIUM;
34
}
119
@@ -XXX,XX +XXX,XX @@ void bdrv_init_with_whitelist(void)
35
- }
120
bdrv_init();
36
- } else {
121
}
37
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
122
38
- bs->bl.zoned != BLK_Z_NONE) {
123
-static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
39
+ } else {
124
- Error **errp)
40
update_zones_wp(bs, s->fd, 0, 1);
125
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
41
}
126
{
42
- }
127
BdrvChild *child, *parent;
43
128
uint64_t perm, shared_perm;
44
- if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
129
diff --git a/block/io.c b/block/io.c
45
- bs->blk.zoned != BLK_Z_NONE) {
130
index XXXXXXX..XXXXXXX 100644
46
qemu_co_mutex_unlock(&wps->colock);
131
--- a/block/io.c
47
}
132
+++ b/block/io.c
48
-}
133
@@ -XXX,XX +XXX,XX @@
49
#endif
134
#include "block/blockjob.h"
135
#include "block/blockjob_int.h"
136
#include "block/block_int.h"
137
+#include "block/coroutines.h"
138
#include "qemu/cutils.h"
139
#include "qapi/error.h"
140
#include "qemu/error-report.h"
141
@@ -XXX,XX +XXX,XX @@ typedef struct RwCo {
142
BdrvRequestFlags flags;
143
} RwCo;
144
145
-static int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
146
- QEMUIOVector *qiov, bool is_write,
147
- BdrvRequestFlags flags)
148
+int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
149
+ QEMUIOVector *qiov, bool is_write,
150
+ BdrvRequestFlags flags)
151
{
152
if (is_write) {
153
return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
154
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_rw_co_entry(void *opaque)
155
/*
156
* Process a vectored synchronous request using coroutines
157
*/
158
-static int bdrv_prwv(BdrvChild *child, int64_t offset,
159
- QEMUIOVector *qiov, bool is_write,
160
- BdrvRequestFlags flags)
161
+int bdrv_prwv(BdrvChild *child, int64_t offset,
162
+ QEMUIOVector *qiov, bool is_write,
163
+ BdrvRequestFlags flags)
164
{
165
RwCo rwco = {
166
.child = child,
167
@@ -XXX,XX +XXX,XX @@ early_out:
50
return ret;
168
return ret;
51
}
169
}
170
171
-static int coroutine_fn
172
+int coroutine_fn
173
bdrv_co_common_block_status_above(BlockDriverState *bs,
174
BlockDriverState *base,
175
bool want_zero,
176
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
177
*
178
* See bdrv_co_block_status_above() for details.
179
*/
180
-static int bdrv_common_block_status_above(BlockDriverState *bs,
181
- BlockDriverState *base,
182
- bool want_zero, int64_t offset,
183
- int64_t bytes, int64_t *pnum,
184
- int64_t *map,
185
- BlockDriverState **file)
186
+int bdrv_common_block_status_above(BlockDriverState *bs,
187
+ BlockDriverState *base,
188
+ bool want_zero, int64_t offset,
189
+ int64_t bytes, int64_t *pnum,
190
+ int64_t *map,
191
+ BlockDriverState **file)
192
{
193
BdrvCoBlockStatusData data = {
194
.bs = bs,
195
@@ -XXX,XX +XXX,XX @@ typedef struct BdrvVmstateCo {
196
bool is_read;
197
} BdrvVmstateCo;
198
199
-static int coroutine_fn
200
+int coroutine_fn
201
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
202
bool is_read)
203
{
204
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
205
return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
206
}
207
208
-static inline int
209
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
210
- bool is_read)
211
+int bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
212
+ bool is_read)
213
{
214
BdrvVmstateCo data = {
215
.bs = bs,
52
--
216
--
53
2.41.0
217
2.26.2
218
diff view generated by jsdifflib
1
This is a regression test for
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
https://bugzilla.redhat.com/show_bug.cgi?id=2234374.
2
3
3
We have a very frequent pattern of creating a coroutine from a function
4
All this test needs to do is trigger an I/O error inside of file-posix
4
with several arguments:
5
(specifically raw_co_prw()). One reliable way to do this without
5
6
requiring special privileges is to use a FUSE export, which allows us to
6
- create a structure to pack parameters
7
inject any error that we want, e.g. via blkdebug.
7
- create _entry function to call original function taking parameters
8
8
from struct
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
9
- do different magic to handle completion: set ret to NOT_DONE or
10
Message-Id: <20230824155345.109765-6-hreitz@redhat.com>
10
EINPROGRESS or use separate bool field
11
[hreitz: Fixed test to be skipped when there is no FUSE support, to
11
- fill the struct and create coroutine from _entry function with this
12
suppress fusermount's allow_other warning, and to be skipped
12
struct as a parameter
13
with $IMGOPTSSYNTAX enabled]
13
- do coroutine enter and BDRV_POLL_WHILE loop
14
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
14
15
Let's reduce code duplication by generating coroutine wrappers.
16
17
This patch adds scripts/block-coroutine-wrapper.py together with some
18
friends, which will generate functions with declared prototypes marked
19
by the 'generated_co_wrapper' specifier.
20
21
The usage of new code generation is as follows:
22
23
1. define the coroutine function somewhere
24
25
int coroutine_fn bdrv_co_NAME(...) {...}
26
27
2. declare in some header file
28
29
int generated_co_wrapper bdrv_NAME(...);
30
31
with same list of parameters (generated_co_wrapper is
32
defined in "include/block/block.h").
33
34
3. Make sure the block_gen_c declaration in block/meson.build
35
mentions the file with your marker function.
36
37
Still, no function is now marked, this work is for the following
38
commit.
39
40
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
41
Reviewed-by: Eric Blake <eblake@redhat.com>
42
Message-Id: <20200924185414.28642-5-vsementsov@virtuozzo.com>
43
[Added encoding='utf-8' to open() calls as requested by Vladimir. Fixed
44
typo and grammar issues pointed out by Eric Blake. Removed clang-format
45
dependency that caused build test issues.
46
--Stefan]
47
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
48
---
16
tests/qemu-iotests/tests/file-io-error | 119 +++++++++++++++++++++
49
block/block-gen.h | 49 ++++++++
17
tests/qemu-iotests/tests/file-io-error.out | 33 ++++++
50
include/block/block.h | 10 ++
18
2 files changed, 152 insertions(+)
51
block/meson.build | 8 ++
19
create mode 100755 tests/qemu-iotests/tests/file-io-error
52
docs/devel/block-coroutine-wrapper.rst | 54 ++++++++
20
create mode 100644 tests/qemu-iotests/tests/file-io-error.out
53
docs/devel/index.rst | 1 +
21
54
scripts/block-coroutine-wrapper.py | 167 +++++++++++++++++++++++++
22
diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error
55
6 files changed, 289 insertions(+)
23
new file mode 100755
56
create mode 100644 block/block-gen.h
24
index XXXXXXX..XXXXXXX
57
create mode 100644 docs/devel/block-coroutine-wrapper.rst
25
--- /dev/null
58
create mode 100644 scripts/block-coroutine-wrapper.py
26
+++ b/tests/qemu-iotests/tests/file-io-error
59
27
@@ -XXX,XX +XXX,XX @@
60
diff --git a/block/block-gen.h b/block/block-gen.h
28
+#!/usr/bin/env bash
29
+# group: rw
30
+#
31
+# Produce an I/O error in file-posix, and hope that it is not catastrophic.
32
+# Regression test for: https://bugzilla.redhat.com/show_bug.cgi?id=2234374
33
+#
34
+# Copyright (C) 2023 Red Hat, Inc.
35
+#
36
+# This program is free software; you can redistribute it and/or modify
37
+# it under the terms of the GNU General Public License as published by
38
+# the Free Software Foundation; either version 2 of the License, or
39
+# (at your option) any later version.
40
+#
41
+# This program is distributed in the hope that it will be useful,
42
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
43
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44
+# GNU General Public License for more details.
45
+#
46
+# You should have received a copy of the GNU General Public License
47
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
48
+#
49
+
50
+seq=$(basename "$0")
51
+echo "QA output created by $seq"
52
+
53
+status=1    # failure is the default!
54
+
55
+_cleanup()
56
+{
57
+ _cleanup_qemu
58
+ rm -f "$TEST_DIR/fuse-export"
59
+}
60
+trap "_cleanup; exit \$status" 0 1 2 3 15
61
+
62
+# get standard environment, filters and checks
63
+. ../common.rc
64
+. ../common.filter
65
+. ../common.qemu
66
+
67
+# Format-agnostic (we do not use any), but we do test the file protocol
68
+_supported_proto file
69
+_require_drivers blkdebug null-co
70
+
71
+if [ "$IMGOPTSSYNTAX" = "true" ]; then
72
+ # We need `$QEMU_IO -f file` to work; IMGOPTSSYNTAX uses --image-opts,
73
+ # breaking -f.
74
+ _unsupported_fmt $IMGFMT
75
+fi
76
+
77
+# This is a regression test of a bug in which flie-posix would access zone
78
+# information in case of an I/O error even when there is no zone information,
79
+# resulting in a division by zero.
80
+# To reproduce the problem, we need to trigger an I/O error inside of
81
+# file-posix, which can be done (rootless) by providing a FUSE export that
82
+# presents only errors when accessed.
83
+
84
+_launch_qemu
85
+_send_qemu_cmd $QEMU_HANDLE \
86
+ "{'execute': 'qmp_capabilities'}" \
87
+ 'return'
88
+
89
+_send_qemu_cmd $QEMU_HANDLE \
90
+ "{'execute': 'blockdev-add',
91
+ 'arguments': {
92
+ 'driver': 'blkdebug',
93
+ 'node-name': 'node0',
94
+ 'inject-error': [{'event': 'none'}],
95
+ 'image': {
96
+ 'driver': 'null-co'
97
+ }
98
+ }}" \
99
+ 'return'
100
+
101
+# FUSE mountpoint must exist and be a regular file
102
+touch "$TEST_DIR/fuse-export"
103
+
104
+# The grep -v to filter fusermount's (benign) error when /etc/fuse.conf does
105
+# not contain user_allow_other and the subsequent check for missing FUSE support
106
+# have both been taken from iotest 308.
107
+output=$(_send_qemu_cmd $QEMU_HANDLE \
108
+ "{'execute': 'block-export-add',
109
+ 'arguments': {
110
+ 'id': 'exp0',
111
+ 'type': 'fuse',
112
+ 'node-name': 'node0',
113
+ 'mountpoint': '$TEST_DIR/fuse-export',
114
+ 'writable': true
115
+ }}" \
116
+ 'return' \
117
+ | grep -v 'option allow_other only allowed if')
118
+
119
+if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then
120
+ _notrun 'No FUSE support'
121
+fi
122
+echo "$output"
123
+
124
+echo
125
+# This should fail, but gracefully, i.e. just print an I/O error, not crash.
126
+$QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io
127
+echo
128
+
129
+_send_qemu_cmd $QEMU_HANDLE \
130
+ "{'execute': 'block-export-del',
131
+ 'arguments': {'id': 'exp0'}}" \
132
+ 'return'
133
+
134
+_send_qemu_cmd $QEMU_HANDLE \
135
+ '' \
136
+ 'BLOCK_EXPORT_DELETED'
137
+
138
+_send_qemu_cmd $QEMU_HANDLE \
139
+ "{'execute': 'blockdev-del',
140
+ 'arguments': {'node-name': 'node0'}}" \
141
+ 'return'
142
+
143
+# success, all done
144
+echo "*** done"
145
+rm -f $seq.full
146
+status=0
147
diff --git a/tests/qemu-iotests/tests/file-io-error.out b/tests/qemu-iotests/tests/file-io-error.out
148
new file mode 100644
61
new file mode 100644
149
index XXXXXXX..XXXXXXX
62
index XXXXXXX..XXXXXXX
150
--- /dev/null
63
--- /dev/null
151
+++ b/tests/qemu-iotests/tests/file-io-error.out
64
+++ b/block/block-gen.h
152
@@ -XXX,XX +XXX,XX @@
65
@@ -XXX,XX +XXX,XX @@
153
+QA output created by file-io-error
66
+/*
154
+{'execute': 'qmp_capabilities'}
67
+ * Block coroutine wrapping core, used by auto-generated block/block-gen.c
155
+{"return": {}}
68
+ *
156
+{'execute': 'blockdev-add',
69
+ * Copyright (c) 2003 Fabrice Bellard
157
+ 'arguments': {
70
+ * Copyright (c) 2020 Virtuozzo International GmbH
158
+ 'driver': 'blkdebug',
71
+ *
159
+ 'node-name': 'node0',
72
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
160
+ 'inject-error': [{'event': 'none'}],
73
+ * of this software and associated documentation files (the "Software"), to deal
161
+ 'image': {
74
+ * in the Software without restriction, including without limitation the rights
162
+ 'driver': 'null-co'
75
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
163
+ }
76
+ * copies of the Software, and to permit persons to whom the Software is
164
+ }}
77
+ * furnished to do so, subject to the following conditions:
165
+{"return": {}}
78
+ *
166
+{'execute': 'block-export-add',
79
+ * The above copyright notice and this permission notice shall be included in
167
+ 'arguments': {
80
+ * all copies or substantial portions of the Software.
168
+ 'id': 'exp0',
81
+ *
169
+ 'type': 'fuse',
82
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
170
+ 'node-name': 'node0',
83
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171
+ 'mountpoint': 'TEST_DIR/fuse-export',
84
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
172
+ 'writable': true
85
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
173
+ }}
86
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
174
+{"return": {}}
87
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
175
+
88
+ * THE SOFTWARE.
176
+write failed: Input/output error
89
+ */
177
+
90
+
178
+{'execute': 'block-export-del',
91
+#ifndef BLOCK_BLOCK_GEN_H
179
+ 'arguments': {'id': 'exp0'}}
92
+#define BLOCK_BLOCK_GEN_H
180
+{"return": {}}
93
+
181
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "exp0"}}
94
+#include "block/block_int.h"
182
+{'execute': 'blockdev-del',
95
+
183
+ 'arguments': {'node-name': 'node0'}}
96
+/* Base structure for argument packing structures */
184
+{"return": {}}
97
+typedef struct BdrvPollCo {
185
+*** done
98
+ BlockDriverState *bs;
99
+ bool in_progress;
100
+ int ret;
101
+ Coroutine *co; /* Keep pointer here for debugging */
102
+} BdrvPollCo;
103
+
104
+static inline int bdrv_poll_co(BdrvPollCo *s)
105
+{
106
+ assert(!qemu_in_coroutine());
107
+
108
+ bdrv_coroutine_enter(s->bs, s->co);
109
+ BDRV_POLL_WHILE(s->bs, s->in_progress);
110
+
111
+ return s->ret;
112
+}
113
+
114
+#endif /* BLOCK_BLOCK_GEN_H */
115
diff --git a/include/block/block.h b/include/block/block.h
116
index XXXXXXX..XXXXXXX 100644
117
--- a/include/block/block.h
118
+++ b/include/block/block.h
119
@@ -XXX,XX +XXX,XX @@
120
#include "block/blockjob.h"
121
#include "qemu/hbitmap.h"
122
123
+/*
124
+ * generated_co_wrapper
125
+ *
126
+ * Function specifier, which does nothing but mark functions to be
127
+ * generated by scripts/block-coroutine-wrapper.py
128
+ *
129
+ * Read more in docs/devel/block-coroutine-wrapper.rst
130
+ */
131
+#define generated_co_wrapper
132
+
133
/* block.c */
134
typedef struct BlockDriver BlockDriver;
135
typedef struct BdrvChild BdrvChild;
136
diff --git a/block/meson.build b/block/meson.build
137
index XXXXXXX..XXXXXXX 100644
138
--- a/block/meson.build
139
+++ b/block/meson.build
140
@@ -XXX,XX +XXX,XX @@ module_block_h = custom_target('module_block.h',
141
command: [module_block_py, '@OUTPUT0@', modsrc])
142
block_ss.add(module_block_h)
143
144
+wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
145
+block_gen_c = custom_target('block-gen.c',
146
+ output: 'block-gen.c',
147
+ input: files('../include/block/block.h',
148
+ 'coroutines.h'),
149
+ command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
150
+block_ss.add(block_gen_c)
151
+
152
block_ss.add(files('stream.c'))
153
154
softmmu_ss.add(files('qapi-sysemu.c'))
155
diff --git a/docs/devel/block-coroutine-wrapper.rst b/docs/devel/block-coroutine-wrapper.rst
156
new file mode 100644
157
index XXXXXXX..XXXXXXX
158
--- /dev/null
159
+++ b/docs/devel/block-coroutine-wrapper.rst
160
@@ -XXX,XX +XXX,XX @@
161
+=======================
162
+block-coroutine-wrapper
163
+=======================
164
+
165
+A lot of functions in QEMU block layer (see ``block/*``) can only be
166
+called in coroutine context. Such functions are normally marked by the
167
+coroutine_fn specifier. Still, sometimes we need to call them from
168
+non-coroutine context; for this we need to start a coroutine, run the
169
+needed function from it and wait for the coroutine to finish in a
170
+BDRV_POLL_WHILE() loop. To run a coroutine we need a function with one
171
+void* argument. So for each coroutine_fn function which needs a
172
+non-coroutine interface, we should define a structure to pack the
173
+parameters, define a separate function to unpack the parameters and
174
+call the original function and finally define a new interface function
175
+with same list of arguments as original one, which will pack the
176
+parameters into a struct, create a coroutine, run it and wait in
177
+BDRV_POLL_WHILE() loop. It's boring to create such wrappers by hand,
178
+so we have a script to generate them.
179
+
180
+Usage
181
+=====
182
+
183
+Assume we have defined the ``coroutine_fn`` function
184
+``bdrv_co_foo(<some args>)`` and need a non-coroutine interface for it,
185
+called ``bdrv_foo(<same args>)``. In this case the script can help. To
186
+trigger the generation:
187
+
188
+1. You need ``bdrv_foo`` declaration somewhere (for example, in
189
+ ``block/coroutines.h``) with the ``generated_co_wrapper`` mark,
190
+ like this:
191
+
192
+.. code-block:: c
193
+
194
+ int generated_co_wrapper bdrv_foo(<some args>);
195
+
196
+2. You need to feed this declaration to block-coroutine-wrapper script.
197
+ For this, add the .h (or .c) file with the declaration to the
198
+ ``input: files(...)`` list of ``block_gen_c`` target declaration in
199
+ ``block/meson.build``
200
+
201
+You are done. During the build, coroutine wrappers will be generated in
202
+``<BUILD_DIR>/block/block-gen.c``.
203
+
204
+Links
205
+=====
206
+
207
+1. The script location is ``scripts/block-coroutine-wrapper.py``.
208
+
209
+2. Generic place for private ``generated_co_wrapper`` declarations is
210
+ ``block/coroutines.h``, for public declarations:
211
+ ``include/block/block.h``
212
+
213
+3. The core API of generated coroutine wrappers is placed in
214
+ (not generated) ``block/block-gen.h``
215
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
216
index XXXXXXX..XXXXXXX 100644
217
--- a/docs/devel/index.rst
218
+++ b/docs/devel/index.rst
219
@@ -XXX,XX +XXX,XX @@ Contents:
220
s390-dasd-ipl
221
clocks
222
qom
223
+ block-coroutine-wrapper
224
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
225
new file mode 100644
226
index XXXXXXX..XXXXXXX
227
--- /dev/null
228
+++ b/scripts/block-coroutine-wrapper.py
229
@@ -XXX,XX +XXX,XX @@
230
+#! /usr/bin/env python3
231
+"""Generate coroutine wrappers for block subsystem.
232
+
233
+The program parses one or several concatenated c files from stdin,
234
+searches for functions with the 'generated_co_wrapper' specifier
235
+and generates corresponding wrappers on stdout.
236
+
237
+Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
238
+
239
+Copyright (c) 2020 Virtuozzo International GmbH.
240
+
241
+This program is free software; you can redistribute it and/or modify
242
+it under the terms of the GNU General Public License as published by
243
+the Free Software Foundation; either version 2 of the License, or
244
+(at your option) any later version.
245
+
246
+This program is distributed in the hope that it will be useful,
247
+but WITHOUT ANY WARRANTY; without even the implied warranty of
248
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
249
+GNU General Public License for more details.
250
+
251
+You should have received a copy of the GNU General Public License
252
+along with this program. If not, see <http://www.gnu.org/licenses/>.
253
+"""
254
+
255
+import sys
256
+import re
257
+from typing import Iterator
258
+
259
+
260
+def gen_header():
261
+ copyright = re.sub('^.*Copyright', 'Copyright', __doc__, flags=re.DOTALL)
262
+ copyright = re.sub('^(?=.)', ' * ', copyright.strip(), flags=re.MULTILINE)
263
+ copyright = re.sub('^$', ' *', copyright, flags=re.MULTILINE)
264
+ return f"""\
265
+/*
266
+ * File is generated by scripts/block-coroutine-wrapper.py
267
+ *
268
+{copyright}
269
+ */
270
+
271
+#include "qemu/osdep.h"
272
+#include "block/coroutines.h"
273
+#include "block/block-gen.h"
274
+#include "block/block_int.h"\
275
+"""
276
+
277
+
278
+class ParamDecl:
279
+ param_re = re.compile(r'(?P<decl>'
280
+ r'(?P<type>.*[ *])'
281
+ r'(?P<name>[a-z][a-z0-9_]*)'
282
+ r')')
283
+
284
+ def __init__(self, param_decl: str) -> None:
285
+ m = self.param_re.match(param_decl.strip())
286
+ if m is None:
287
+ raise ValueError(f'Wrong parameter declaration: "{param_decl}"')
288
+ self.decl = m.group('decl')
289
+ self.type = m.group('type')
290
+ self.name = m.group('name')
291
+
292
+
293
+class FuncDecl:
294
+ def __init__(self, return_type: str, name: str, args: str) -> None:
295
+ self.return_type = return_type.strip()
296
+ self.name = name.strip()
297
+ self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
298
+
299
+ def gen_list(self, format: str) -> str:
300
+ return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
301
+
302
+ def gen_block(self, format: str) -> str:
303
+ return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
304
+
305
+
306
+# Match wrappers declared with a generated_co_wrapper mark
307
+func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
308
+ r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
309
+ r'\((?P<args>[^)]*)\);$', re.MULTILINE)
310
+
311
+
312
+def func_decl_iter(text: str) -> Iterator:
313
+ for m in func_decl_re.finditer(text):
314
+ yield FuncDecl(return_type='int',
315
+ name=m.group('wrapper_name'),
316
+ args=m.group('args'))
317
+
318
+
319
+def snake_to_camel(func_name: str) -> str:
320
+ """
321
+ Convert underscore names like 'some_function_name' to camel-case like
322
+ 'SomeFunctionName'
323
+ """
324
+ words = func_name.split('_')
325
+ words = [w[0].upper() + w[1:] for w in words]
326
+ return ''.join(words)
327
+
328
+
329
+def gen_wrapper(func: FuncDecl) -> str:
330
+ assert func.name.startswith('bdrv_')
331
+ assert not func.name.startswith('bdrv_co_')
332
+ assert func.return_type == 'int'
333
+ assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
334
+
335
+ name = 'bdrv_co_' + func.name[5:]
336
+ bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
337
+ struct_name = snake_to_camel(name)
338
+
339
+ return f"""\
340
+/*
341
+ * Wrappers for {name}
342
+ */
343
+
344
+typedef struct {struct_name} {{
345
+ BdrvPollCo poll_state;
346
+{ func.gen_block(' {decl};') }
347
+}} {struct_name};
348
+
349
+static void coroutine_fn {name}_entry(void *opaque)
350
+{{
351
+ {struct_name} *s = opaque;
352
+
353
+ s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
354
+ s->poll_state.in_progress = false;
355
+
356
+ aio_wait_kick();
357
+}}
358
+
359
+int {func.name}({ func.gen_list('{decl}') })
360
+{{
361
+ if (qemu_in_coroutine()) {{
362
+ return {name}({ func.gen_list('{name}') });
363
+ }} else {{
364
+ {struct_name} s = {{
365
+ .poll_state.bs = {bs},
366
+ .poll_state.in_progress = true,
367
+
368
+{ func.gen_block(' .{name} = {name},') }
369
+ }};
370
+
371
+ s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
372
+
373
+ return bdrv_poll_co(&s.poll_state);
374
+ }}
375
+}}"""
376
+
377
+
378
+def gen_wrappers(input_code: str) -> str:
379
+ res = ''
380
+ for func in func_decl_iter(input_code):
381
+ res += '\n\n\n'
382
+ res += gen_wrapper(func)
383
+
384
+ return res
385
+
386
+
387
+if __name__ == '__main__':
388
+ if len(sys.argv) < 3:
389
+ exit(f'Usage: {sys.argv[0]} OUT_FILE.c IN_FILE.[ch]...')
390
+
391
+ with open(sys.argv[1], 'w', encoding='utf-8') as f_out:
392
+ f_out.write(gen_header())
393
+ for fname in sys.argv[2:]:
394
+ with open(fname, encoding='utf-8') as f_in:
395
+ f_out.write(gen_wrappers(f_in.read()))
396
+ f_out.write('\n')
186
--
397
--
187
2.41.0
398
2.26.2
399
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Use enum ThrottleDirection instead in the throttle test codes.
3
Use code generation implemented in previous commit to generated
4
coroutine wrappers in block.c and block/io.c
4
5
5
Reviewed-by: Alberto Garcia <berto@igalia.com>
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Message-Id: <20230728022006.1098509-3-pizhenwei@bytedance.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
Message-Id: <20200924185414.28642-6-vsementsov@virtuozzo.com>
10
---
11
---
11
tests/unit/test-throttle.c | 6 +++---
12
block/coroutines.h | 6 +-
12
1 file changed, 3 insertions(+), 3 deletions(-)
13
include/block/block.h | 16 ++--
14
block.c | 73 ---------------
15
block/io.c | 212 ------------------------------------------
16
4 files changed, 13 insertions(+), 294 deletions(-)
13
17
14
diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
18
diff --git a/block/coroutines.h b/block/coroutines.h
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/unit/test-throttle.c
20
--- a/block/coroutines.h
17
+++ b/tests/unit/test-throttle.c
21
+++ b/block/coroutines.h
18
@@ -XXX,XX +XXX,XX @@ static void test_init(void)
22
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
19
23
int coroutine_fn
20
/* check initialized fields */
24
bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
21
g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
25
bool is_write, BdrvRequestFlags flags);
22
- g_assert(tt->timers[0]);
26
-int
23
- g_assert(tt->timers[1]);
27
+int generated_co_wrapper
24
+ g_assert(tt->timers[THROTTLE_READ]);
28
bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
25
+ g_assert(tt->timers[THROTTLE_WRITE]);
29
bool is_write, BdrvRequestFlags flags);
26
30
27
/* check other fields where cleared */
31
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
28
g_assert(!ts.previous_leak);
32
int64_t *pnum,
29
@@ -XXX,XX +XXX,XX @@ static void test_destroy(void)
33
int64_t *map,
30
throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
34
BlockDriverState **file);
31
read_timer_cb, write_timer_cb, &ts);
35
-int
32
throttle_timers_destroy(tt);
36
+int generated_co_wrapper
33
- for (i = 0; i < 2; i++) {
37
bdrv_common_block_status_above(BlockDriverState *bs,
34
+ for (i = 0; i < THROTTLE_MAX; i++) {
38
BlockDriverState *base,
35
g_assert(!tt->timers[i]);
39
bool want_zero,
40
@@ -XXX,XX +XXX,XX @@ bdrv_common_block_status_above(BlockDriverState *bs,
41
int coroutine_fn
42
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
43
bool is_read);
44
-int
45
+int generated_co_wrapper
46
bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
47
bool is_read);
48
49
diff --git a/include/block/block.h b/include/block/block.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/include/block/block.h
52
+++ b/include/block/block.h
53
@@ -XXX,XX +XXX,XX @@ void bdrv_refresh_filename(BlockDriverState *bs);
54
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
55
PreallocMode prealloc, BdrvRequestFlags flags,
56
Error **errp);
57
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
58
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
59
+int generated_co_wrapper
60
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
61
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
62
63
int64_t bdrv_nb_sectors(BlockDriverState *bs);
64
int64_t bdrv_getlength(BlockDriverState *bs);
65
@@ -XXX,XX +XXX,XX @@ typedef enum {
66
BDRV_FIX_ERRORS = 2,
67
} BdrvCheckMode;
68
69
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
70
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
71
+ BdrvCheckMode fix);
72
73
/* The units of offset and total_work_size may be chosen arbitrarily by the
74
* block driver; total_work_size may change during the course of the amendment
75
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
76
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
77
78
/* Invalidate any cached metadata used by image formats */
79
-int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
80
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
81
+ Error **errp);
82
void bdrv_invalidate_cache_all(Error **errp);
83
int bdrv_inactivate_all(void);
84
85
/* Ensure contents are flushed to disk. */
86
-int bdrv_flush(BlockDriverState *bs);
87
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
88
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
89
int bdrv_flush_all(void);
90
void bdrv_close_all(void);
91
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all(void);
92
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
93
cond); })
94
95
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
96
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
97
+ int64_t bytes);
98
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
99
int bdrv_has_zero_init_1(BlockDriverState *bs);
100
int bdrv_has_zero_init(BlockDriverState *bs);
101
diff --git a/block.c b/block.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/block.c
104
+++ b/block.c
105
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
106
return bs->drv->bdrv_co_check(bs, res, fix);
107
}
108
109
-typedef struct CheckCo {
110
- BlockDriverState *bs;
111
- BdrvCheckResult *res;
112
- BdrvCheckMode fix;
113
- int ret;
114
-} CheckCo;
115
-
116
-static void coroutine_fn bdrv_check_co_entry(void *opaque)
117
-{
118
- CheckCo *cco = opaque;
119
- cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
120
- aio_wait_kick();
121
-}
122
-
123
-int bdrv_check(BlockDriverState *bs,
124
- BdrvCheckResult *res, BdrvCheckMode fix)
125
-{
126
- Coroutine *co;
127
- CheckCo cco = {
128
- .bs = bs,
129
- .res = res,
130
- .ret = -EINPROGRESS,
131
- .fix = fix,
132
- };
133
-
134
- if (qemu_in_coroutine()) {
135
- /* Fast-path if already in coroutine context */
136
- bdrv_check_co_entry(&cco);
137
- } else {
138
- co = qemu_coroutine_create(bdrv_check_co_entry, &cco);
139
- bdrv_coroutine_enter(bs, co);
140
- BDRV_POLL_WHILE(bs, cco.ret == -EINPROGRESS);
141
- }
142
-
143
- return cco.ret;
144
-}
145
-
146
/*
147
* Return values:
148
* 0 - success
149
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
150
return 0;
151
}
152
153
-typedef struct InvalidateCacheCo {
154
- BlockDriverState *bs;
155
- Error **errp;
156
- bool done;
157
- int ret;
158
-} InvalidateCacheCo;
159
-
160
-static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
161
-{
162
- InvalidateCacheCo *ico = opaque;
163
- ico->ret = bdrv_co_invalidate_cache(ico->bs, ico->errp);
164
- ico->done = true;
165
- aio_wait_kick();
166
-}
167
-
168
-int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
169
-{
170
- Coroutine *co;
171
- InvalidateCacheCo ico = {
172
- .bs = bs,
173
- .done = false,
174
- .errp = errp
175
- };
176
-
177
- if (qemu_in_coroutine()) {
178
- /* Fast-path if already in coroutine context */
179
- bdrv_invalidate_cache_co_entry(&ico);
180
- } else {
181
- co = qemu_coroutine_create(bdrv_invalidate_cache_co_entry, &ico);
182
- bdrv_coroutine_enter(bs, co);
183
- BDRV_POLL_WHILE(bs, !ico.done);
184
- }
185
-
186
- return ico.ret;
187
-}
188
-
189
void bdrv_invalidate_cache_all(Error **errp)
190
{
191
BlockDriverState *bs;
192
diff --git a/block/io.c b/block/io.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/block/io.c
195
+++ b/block/io.c
196
@@ -XXX,XX +XXX,XX @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
197
return 0;
198
}
199
200
-typedef int coroutine_fn BdrvRequestEntry(void *opaque);
201
-typedef struct BdrvRunCo {
202
- BdrvRequestEntry *entry;
203
- void *opaque;
204
- int ret;
205
- bool done;
206
- Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */
207
-} BdrvRunCo;
208
-
209
-static void coroutine_fn bdrv_run_co_entry(void *opaque)
210
-{
211
- BdrvRunCo *arg = opaque;
212
-
213
- arg->ret = arg->entry(arg->opaque);
214
- arg->done = true;
215
- aio_wait_kick();
216
-}
217
-
218
-static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry,
219
- void *opaque)
220
-{
221
- if (qemu_in_coroutine()) {
222
- /* Fast-path if already in coroutine context */
223
- return entry(opaque);
224
- } else {
225
- BdrvRunCo s = { .entry = entry, .opaque = opaque };
226
-
227
- s.co = qemu_coroutine_create(bdrv_run_co_entry, &s);
228
- bdrv_coroutine_enter(bs, s.co);
229
-
230
- BDRV_POLL_WHILE(bs, !s.done);
231
-
232
- return s.ret;
233
- }
234
-}
235
-
236
-typedef struct RwCo {
237
- BdrvChild *child;
238
- int64_t offset;
239
- QEMUIOVector *qiov;
240
- bool is_write;
241
- BdrvRequestFlags flags;
242
-} RwCo;
243
-
244
int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
245
QEMUIOVector *qiov, bool is_write,
246
BdrvRequestFlags flags)
247
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
36
}
248
}
37
}
249
}
250
251
-static int coroutine_fn bdrv_rw_co_entry(void *opaque)
252
-{
253
- RwCo *rwco = opaque;
254
-
255
- return bdrv_co_prwv(rwco->child, rwco->offset, rwco->qiov,
256
- rwco->is_write, rwco->flags);
257
-}
258
-
259
-/*
260
- * Process a vectored synchronous request using coroutines
261
- */
262
-int bdrv_prwv(BdrvChild *child, int64_t offset,
263
- QEMUIOVector *qiov, bool is_write,
264
- BdrvRequestFlags flags)
265
-{
266
- RwCo rwco = {
267
- .child = child,
268
- .offset = offset,
269
- .qiov = qiov,
270
- .is_write = is_write,
271
- .flags = flags,
272
- };
273
-
274
- return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco);
275
-}
276
-
277
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
278
int bytes, BdrvRequestFlags flags)
279
{
280
@@ -XXX,XX +XXX,XX @@ int bdrv_flush_all(void)
281
return result;
282
}
283
284
-
285
-typedef struct BdrvCoBlockStatusData {
286
- BlockDriverState *bs;
287
- BlockDriverState *base;
288
- bool want_zero;
289
- int64_t offset;
290
- int64_t bytes;
291
- int64_t *pnum;
292
- int64_t *map;
293
- BlockDriverState **file;
294
-} BdrvCoBlockStatusData;
295
-
296
/*
297
* Returns the allocation status of the specified sectors.
298
* Drivers not implementing the functionality are assumed to not support
299
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
300
return ret;
301
}
302
303
-/* Coroutine wrapper for bdrv_block_status_above() */
304
-static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
305
-{
306
- BdrvCoBlockStatusData *data = opaque;
307
-
308
- return bdrv_co_common_block_status_above(data->bs, data->base,
309
- data->want_zero,
310
- data->offset, data->bytes,
311
- data->pnum, data->map, data->file);
312
-}
313
-
314
-/*
315
- * Synchronous wrapper around bdrv_co_block_status_above().
316
- *
317
- * See bdrv_co_block_status_above() for details.
318
- */
319
-int bdrv_common_block_status_above(BlockDriverState *bs,
320
- BlockDriverState *base,
321
- bool want_zero, int64_t offset,
322
- int64_t bytes, int64_t *pnum,
323
- int64_t *map,
324
- BlockDriverState **file)
325
-{
326
- BdrvCoBlockStatusData data = {
327
- .bs = bs,
328
- .base = base,
329
- .want_zero = want_zero,
330
- .offset = offset,
331
- .bytes = bytes,
332
- .pnum = pnum,
333
- .map = map,
334
- .file = file,
335
- };
336
-
337
- return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data);
338
-}
339
-
340
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
341
int64_t offset, int64_t bytes, int64_t *pnum,
342
int64_t *map, BlockDriverState **file)
343
@@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top,
344
return 0;
345
}
346
347
-typedef struct BdrvVmstateCo {
348
- BlockDriverState *bs;
349
- QEMUIOVector *qiov;
350
- int64_t pos;
351
- bool is_read;
352
-} BdrvVmstateCo;
353
-
354
int coroutine_fn
355
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
356
bool is_read)
357
@@ -XXX,XX +XXX,XX @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
358
return ret;
359
}
360
361
-static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
362
-{
363
- BdrvVmstateCo *co = opaque;
364
-
365
- return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
366
-}
367
-
368
-int bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
369
- bool is_read)
370
-{
371
- BdrvVmstateCo data = {
372
- .bs = bs,
373
- .qiov = qiov,
374
- .pos = pos,
375
- .is_read = is_read,
376
- };
377
-
378
- return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data);
379
-}
380
-
381
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
382
int64_t pos, int size)
383
{
384
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
385
/**************************************************************/
386
/* Coroutine block device emulation */
387
388
-static int coroutine_fn bdrv_flush_co_entry(void *opaque)
389
-{
390
- return bdrv_co_flush(opaque);
391
-}
392
-
393
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
394
{
395
BdrvChild *primary_child = bdrv_primary_child(bs);
396
@@ -XXX,XX +XXX,XX @@ early_exit:
397
return ret;
398
}
399
400
-int bdrv_flush(BlockDriverState *bs)
401
-{
402
- return bdrv_run_co(bs, bdrv_flush_co_entry, bs);
403
-}
404
-
405
-typedef struct DiscardCo {
406
- BdrvChild *child;
407
- int64_t offset;
408
- int64_t bytes;
409
-} DiscardCo;
410
-
411
-static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
412
-{
413
- DiscardCo *rwco = opaque;
414
-
415
- return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
416
-}
417
-
418
int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
419
int64_t bytes)
420
{
421
@@ -XXX,XX +XXX,XX @@ out:
422
return ret;
423
}
424
425
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
426
-{
427
- DiscardCo rwco = {
428
- .child = child,
429
- .offset = offset,
430
- .bytes = bytes,
431
- };
432
-
433
- return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco);
434
-}
435
-
436
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
437
{
438
BlockDriver *drv = bs->drv;
439
@@ -XXX,XX +XXX,XX @@ out:
440
441
return ret;
442
}
443
-
444
-typedef struct TruncateCo {
445
- BdrvChild *child;
446
- int64_t offset;
447
- bool exact;
448
- PreallocMode prealloc;
449
- BdrvRequestFlags flags;
450
- Error **errp;
451
-} TruncateCo;
452
-
453
-static int coroutine_fn bdrv_truncate_co_entry(void *opaque)
454
-{
455
- TruncateCo *tco = opaque;
456
-
457
- return bdrv_co_truncate(tco->child, tco->offset, tco->exact,
458
- tco->prealloc, tco->flags, tco->errp);
459
-}
460
-
461
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
462
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
463
-{
464
- TruncateCo tco = {
465
- .child = child,
466
- .offset = offset,
467
- .exact = exact,
468
- .prealloc = prealloc,
469
- .flags = flags,
470
- .errp = errp,
471
- };
472
-
473
- return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
474
-}
38
--
475
--
39
2.41.0
476
2.26.2
477
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Only one direction is necessary in several scenarios:
3
Now that we are not maintaining boilerplate code for coroutine
4
- a read-only disk
4
wrappers, there is no more sense in keeping the extra indirection layer
5
- operations on a device are considered as *write* only. For example,
5
of bdrv_prwv(). Let's drop it and instead generate pure bdrv_preadv()
6
encrypt/decrypt/sign/verify operations on a cryptodev use a single
6
and bdrv_pwritev().
7
*write* timer(read timer callback is defined, but never invoked).
8
7
9
Allow a single direction in throttle, this reduces memory, and uplayer
8
Currently, bdrv_pwritev() and bdrv_preadv() are returning bytes on
10
does not need a dummy callback any more.
9
success, auto generated functions will instead return zero, as their
10
_co_ prototype. Still, it's simple to make the conversion safe: the
11
only external user of bdrv_pwritev() is test-bdrv-drain, and it is
12
comfortable enough with bdrv_co_pwritev() instead. So prototypes are
13
moved to local block/coroutines.h. Next, the only internal use is
14
bdrv_pread() and bdrv_pwrite(), which are modified to return bytes on
15
success.
11
16
12
Reviewed-by: Alberto Garcia <berto@igalia.com>
17
Of course, it would be great to convert bdrv_pread() and bdrv_pwrite()
13
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
18
to return 0 on success. But this requires audit (and probably
14
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
19
conversion) of all their users, let's leave it for another day
15
Message-Id: <20230728022006.1098509-4-pizhenwei@bytedance.com>
20
refactoring.
16
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
21
22
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Reviewed-by: Eric Blake <eblake@redhat.com>
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Message-Id: <20200924185414.28642-7-vsementsov@virtuozzo.com>
17
---
28
---
18
util/throttle.c | 42 ++++++++++++++++++++++++++++--------------
29
block/coroutines.h | 10 ++++-----
19
1 file changed, 28 insertions(+), 14 deletions(-)
30
include/block/block.h | 2 --
31
block/io.c | 49 ++++++++---------------------------------
32
tests/test-bdrv-drain.c | 2 +-
33
4 files changed, 15 insertions(+), 48 deletions(-)
20
34
21
diff --git a/util/throttle.c b/util/throttle.c
35
diff --git a/block/coroutines.h b/block/coroutines.h
22
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
23
--- a/util/throttle.c
37
--- a/block/coroutines.h
24
+++ b/util/throttle.c
38
+++ b/block/coroutines.h
25
@@ -XXX,XX +XXX,XX @@ static bool throttle_compute_timer(ThrottleState *ts,
39
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
26
void throttle_timers_attach_aio_context(ThrottleTimers *tt,
40
BdrvCheckResult *res, BdrvCheckMode fix);
27
AioContext *new_context)
41
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
42
43
-int coroutine_fn
44
-bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
45
- bool is_write, BdrvRequestFlags flags);
46
int generated_co_wrapper
47
-bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
48
- bool is_write, BdrvRequestFlags flags);
49
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
50
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
51
+int generated_co_wrapper
52
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
53
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
54
55
int coroutine_fn
56
bdrv_co_common_block_status_above(BlockDriverState *bs,
57
diff --git a/include/block/block.h b/include/block/block.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/include/block/block.h
60
+++ b/include/block/block.h
61
@@ -XXX,XX +XXX,XX @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
62
int bytes, BdrvRequestFlags flags);
63
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
64
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
65
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
66
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
67
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
68
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
69
const void *buf, int count);
70
/*
71
diff --git a/block/io.c b/block/io.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/block/io.c
74
+++ b/block/io.c
75
@@ -XXX,XX +XXX,XX @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
76
return 0;
77
}
78
79
-int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
80
- QEMUIOVector *qiov, bool is_write,
81
- BdrvRequestFlags flags)
82
-{
83
- if (is_write) {
84
- return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
85
- } else {
86
- return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
87
- }
88
-}
89
-
90
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
91
int bytes, BdrvRequestFlags flags)
28
{
92
{
29
- tt->timers[THROTTLE_READ] =
93
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
30
- aio_timer_new(new_context, tt->clock_type, SCALE_NS,
94
-
31
- tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
95
- return bdrv_prwv(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags);
32
- tt->timers[THROTTLE_WRITE] =
96
+ return bdrv_pwritev(child, offset, bytes, NULL,
33
- aio_timer_new(new_context, tt->clock_type, SCALE_NS,
97
+ BDRV_REQ_ZERO_WRITE | flags);
34
- tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
35
+ ThrottleDirection dir;
36
+
37
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
38
+ if (tt->timer_cb[dir]) {
39
+ tt->timers[dir] =
40
+ aio_timer_new(new_context, tt->clock_type, SCALE_NS,
41
+ tt->timer_cb[dir], tt->timer_opaque);
42
+ }
43
+ }
44
}
98
}
45
99
46
/*
100
/*
47
@@ -XXX,XX +XXX,XX @@ void throttle_timers_init(ThrottleTimers *tt,
101
@@ -XXX,XX +XXX,XX @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
48
QEMUTimerCB *write_timer_cb,
49
void *timer_opaque)
50
{
51
+ assert(read_timer_cb || write_timer_cb);
52
memset(tt, 0, sizeof(ThrottleTimers));
53
54
tt->clock_type = clock_type;
55
@@ -XXX,XX +XXX,XX @@ void throttle_timers_init(ThrottleTimers *tt,
56
/* destroy a timer */
57
static void throttle_timer_destroy(QEMUTimer **timer)
58
{
59
- assert(*timer != NULL);
60
+ if (*timer == NULL) {
61
+ return;
62
+ }
63
64
timer_free(*timer);
65
*timer = NULL;
66
@@ -XXX,XX +XXX,XX @@ static void throttle_timer_destroy(QEMUTimer **timer)
67
/* Remove timers from event loop */
68
void throttle_timers_detach_aio_context(ThrottleTimers *tt)
69
{
70
- int i;
71
+ ThrottleDirection dir;
72
73
- for (i = 0; i < THROTTLE_MAX; i++) {
74
- throttle_timer_destroy(&tt->timers[i]);
75
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
76
+ throttle_timer_destroy(&tt->timers[dir]);
77
}
102
}
78
}
103
}
79
104
80
@@ -XXX,XX +XXX,XX @@ void throttle_timers_destroy(ThrottleTimers *tt)
105
-/* return < 0 if error. See bdrv_pwrite() for the return codes */
81
/* is any throttling timer configured */
106
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
82
bool throttle_timers_are_initialized(ThrottleTimers *tt)
107
-{
108
- int ret;
109
-
110
- ret = bdrv_prwv(child, offset, qiov, false, 0);
111
- if (ret < 0) {
112
- return ret;
113
- }
114
-
115
- return qiov->size;
116
-}
117
-
118
/* See bdrv_pwrite() for the return codes */
119
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
83
{
120
{
84
- if (tt->timers[0]) {
121
+ int ret;
85
- return true;
122
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
86
+ ThrottleDirection dir;
123
124
if (bytes < 0) {
125
return -EINVAL;
126
}
127
128
- return bdrv_preadv(child, offset, &qiov);
129
-}
130
+ ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
131
132
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
133
-{
134
- int ret;
135
-
136
- ret = bdrv_prwv(child, offset, qiov, true, 0);
137
- if (ret < 0) {
138
- return ret;
139
- }
140
-
141
- return qiov->size;
142
+ return ret < 0 ? ret : bytes;
143
}
144
145
/* Return no. of bytes on success or < 0 on error. Important errors are:
146
@@ -XXX,XX +XXX,XX @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
147
*/
148
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
149
{
150
+ int ret;
151
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
152
153
if (bytes < 0) {
154
return -EINVAL;
155
}
156
157
- return bdrv_pwritev(child, offset, &qiov);
158
+ ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
87
+
159
+
88
+ for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
160
+ return ret < 0 ? ret : bytes;
89
+ if (tt->timers[dir]) {
90
+ return true;
91
+ }
92
}
93
94
return false;
95
@@ -XXX,XX +XXX,XX @@ bool throttle_schedule_timer(ThrottleState *ts,
96
{
97
int64_t now = qemu_clock_get_ns(tt->clock_type);
98
int64_t next_timestamp;
99
+ QEMUTimer *timer;
100
bool must_wait;
101
102
+ timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ];
103
+ assert(timer);
104
+
105
must_wait = throttle_compute_timer(ts,
106
is_write,
107
now,
108
@@ -XXX,XX +XXX,XX @@ bool throttle_schedule_timer(ThrottleState *ts,
109
}
110
111
/* request throttled and timer pending -> do nothing */
112
- if (timer_pending(tt->timers[is_write])) {
113
+ if (timer_pending(timer)) {
114
return true;
115
}
116
117
/* request throttled and timer not pending -> arm timer */
118
- timer_mod(tt->timers[is_write], next_timestamp);
119
+ timer_mod(timer, next_timestamp);
120
return true;
121
}
161
}
122
162
163
/*
164
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/tests/test-bdrv-drain.c
167
+++ b/tests/test-bdrv-drain.c
168
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
169
}
170
s->io_co = NULL;
171
172
- ret = bdrv_preadv(bs->backing, offset, qiov);
173
+ ret = bdrv_co_preadv(bs->backing, offset, bytes, qiov, 0);
174
s->has_read = true;
175
176
/* Wake up drain_co if it runs */
123
--
177
--
124
2.41.0
178
2.26.2
179
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
'bool is_write' style is obsolete from throttle framework, adapt
3
Like for read/write in a previous commit, drop extra indirection layer,
4
fsdev to the new style.
4
generate directly bdrv_readv_vmstate() and bdrv_writev_vmstate().
5
5
6
Cc: Greg Kurz <groug@kaod.org>
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Message-Id: <20230728022006.1098509-9-pizhenwei@bytedance.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Greg Kurz <groug@kaod.org>
10
Message-Id: <20200924185414.28642-8-vsementsov@virtuozzo.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
11
---
13
fsdev/qemu-fsdev-throttle.h | 4 ++--
12
block/coroutines.h | 10 +++----
14
fsdev/qemu-fsdev-throttle.c | 14 +++++++-------
13
include/block/block.h | 6 ++--
15
hw/9pfs/cofile.c | 4 ++--
14
block/io.c | 70 ++++++++++++++++++++++---------------------
16
3 files changed, 11 insertions(+), 11 deletions(-)
15
3 files changed, 44 insertions(+), 42 deletions(-)
17
16
18
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
17
diff --git a/block/coroutines.h b/block/coroutines.h
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/fsdev/qemu-fsdev-throttle.h
19
--- a/block/coroutines.h
21
+++ b/fsdev/qemu-fsdev-throttle.h
20
+++ b/block/coroutines.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct FsThrottle {
21
@@ -XXX,XX +XXX,XX @@ bdrv_common_block_status_above(BlockDriverState *bs,
23
ThrottleState ts;
22
int64_t *map,
24
ThrottleTimers tt;
23
BlockDriverState **file);
25
ThrottleConfig cfg;
24
26
- CoQueue throttled_reqs[2];
25
-int coroutine_fn
27
+ CoQueue throttled_reqs[THROTTLE_MAX];
26
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
28
} FsThrottle;
27
- bool is_read);
29
28
-int generated_co_wrapper
30
int fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
29
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
31
30
- bool is_read);
32
void fsdev_throttle_init(FsThrottle *);
31
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
33
32
+ QEMUIOVector *qiov, int64_t pos);
34
-void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
33
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
35
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *, ThrottleDirection ,
34
+ QEMUIOVector *qiov, int64_t pos);
36
struct iovec *, int);
35
37
36
#endif /* BLOCK_COROUTINES_INT_H */
38
void fsdev_throttle_cleanup(FsThrottle *);
37
diff --git a/include/block/block.h b/include/block/block.h
39
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
40
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
41
--- a/fsdev/qemu-fsdev-throttle.c
39
--- a/include/block/block.h
42
+++ b/fsdev/qemu-fsdev-throttle.c
40
+++ b/include/block/block.h
43
@@ -XXX,XX +XXX,XX @@ void fsdev_throttle_init(FsThrottle *fst)
41
@@ -XXX,XX +XXX,XX @@ int path_has_protocol(const char *path);
42
int path_is_absolute(const char *path);
43
char *path_combine(const char *base_path, const char *filename);
44
45
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
46
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
47
+int generated_co_wrapper
48
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
49
+int generated_co_wrapper
50
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
51
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
52
int64_t pos, int size);
53
54
diff --git a/block/io.c b/block/io.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/block/io.c
57
+++ b/block/io.c
58
@@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top,
59
}
60
61
int coroutine_fn
62
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
63
- bool is_read)
64
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
65
{
66
BlockDriver *drv = bs->drv;
67
BlockDriverState *child_bs = bdrv_primary_bs(bs);
68
int ret = -ENOTSUP;
69
70
+ if (!drv) {
71
+ return -ENOMEDIUM;
72
+ }
73
+
74
bdrv_inc_in_flight(bs);
75
76
+ if (drv->bdrv_load_vmstate) {
77
+ ret = drv->bdrv_load_vmstate(bs, qiov, pos);
78
+ } else if (child_bs) {
79
+ ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
80
+ }
81
+
82
+ bdrv_dec_in_flight(bs);
83
+
84
+ return ret;
85
+}
86
+
87
+int coroutine_fn
88
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
89
+{
90
+ BlockDriver *drv = bs->drv;
91
+ BlockDriverState *child_bs = bdrv_primary_bs(bs);
92
+ int ret = -ENOTSUP;
93
+
94
if (!drv) {
95
- ret = -ENOMEDIUM;
96
- } else if (drv->bdrv_load_vmstate) {
97
- if (is_read) {
98
- ret = drv->bdrv_load_vmstate(bs, qiov, pos);
99
- } else {
100
- ret = drv->bdrv_save_vmstate(bs, qiov, pos);
101
- }
102
+ return -ENOMEDIUM;
103
+ }
104
+
105
+ bdrv_inc_in_flight(bs);
106
+
107
+ if (drv->bdrv_save_vmstate) {
108
+ ret = drv->bdrv_save_vmstate(bs, qiov, pos);
109
} else if (child_bs) {
110
- ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
111
+ ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
44
}
112
}
113
114
bdrv_dec_in_flight(bs);
115
+
116
return ret;
45
}
117
}
46
118
47
-void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
119
@@ -XXX,XX +XXX,XX @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
48
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst,
120
int64_t pos, int size)
49
+ ThrottleDirection direction,
50
struct iovec *iov, int iovcnt)
51
{
121
{
52
- ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
122
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
123
- int ret;
124
+ int ret = bdrv_writev_vmstate(bs, &qiov, pos);
125
126
- ret = bdrv_writev_vmstate(bs, &qiov, pos);
127
- if (ret < 0) {
128
- return ret;
129
- }
53
-
130
-
54
+ assert(direction < THROTTLE_MAX);
131
- return size;
55
if (throttle_enabled(&fst->cfg)) {
132
-}
56
if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) ||
133
-
57
- !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
134
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
58
- qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
135
-{
59
+ !qemu_co_queue_empty(&fst->throttled_reqs[direction])) {
136
- return bdrv_rw_vmstate(bs, qiov, pos, false);
60
+ qemu_co_queue_wait(&fst->throttled_reqs[direction], NULL);
137
+ return ret < 0 ? ret : size;
61
}
62
63
throttle_account(&fst->ts, direction, iov_size(iov, iovcnt));
64
65
- if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
66
+ if (!qemu_co_queue_empty(&fst->throttled_reqs[direction]) &&
67
!throttle_schedule_timer(&fst->ts, &fst->tt, direction)) {
68
- qemu_co_queue_next(&fst->throttled_reqs[is_write]);
69
+ qemu_co_queue_next(&fst->throttled_reqs[direction]);
70
}
71
}
72
}
138
}
73
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
139
74
index XXXXXXX..XXXXXXX 100644
140
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
75
--- a/hw/9pfs/cofile.c
141
int64_t pos, int size)
76
+++ b/hw/9pfs/cofile.c
142
{
77
@@ -XXX,XX +XXX,XX @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
143
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
78
if (v9fs_request_cancelled(pdu)) {
144
- int ret;
79
return -EINTR;
145
+ int ret = bdrv_readv_vmstate(bs, &qiov, pos);
80
}
146
81
- fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
147
- ret = bdrv_readv_vmstate(bs, &qiov, pos);
82
+ fsdev_co_throttle_request(s->ctx.fst, THROTTLE_WRITE, iov, iovcnt);
148
- if (ret < 0) {
83
v9fs_co_run_in_worker(
149
- return ret;
84
{
150
- }
85
err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
151
-
86
@@ -XXX,XX +XXX,XX @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
152
- return size;
87
if (v9fs_request_cancelled(pdu)) {
153
-}
88
return -EINTR;
154
-
89
}
155
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
90
- fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
156
-{
91
+ fsdev_co_throttle_request(s->ctx.fst, THROTTLE_READ, iov, iovcnt);
157
- return bdrv_rw_vmstate(bs, qiov, pos, true);
92
v9fs_co_run_in_worker(
158
+ return ret < 0 ? ret : size;
93
{
159
}
94
err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);
160
161
/**************************************************************/
95
--
162
--
96
2.41.0
163
2.26.2
164
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
The first dimension of both to_check and
3
This is the only non-ascii character in the file and it doesn't really
4
bucket_types_size/bucket_types_units is used as throttle direction,
4
needed here. Let's use normal "'" symbol for consistency with the rest
5
use THROTTLE_MAX instead of hard coded number. Also use ARRAY_SIZE()
5
11 occurrences of "'" in the file.
6
to avoid hard coded number for the second dimension.
7
6
8
Hanna noticed that the two array should be static. Yes, turn them
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
into static variables.
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
include/block/block.h | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
10
13
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
14
diff --git a/include/block/block.h b/include/block/block.h
12
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
15
index XXXXXXX..XXXXXXX 100644
13
Message-Id: <20230728022006.1098509-8-pizhenwei@bytedance.com>
16
--- a/include/block/block.h
14
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
17
+++ b/include/block/block.h
15
---
18
@@ -XXX,XX +XXX,XX @@ enum BdrvChildRoleBits {
16
util/throttle.c | 11 ++++++-----
19
BDRV_CHILD_FILTERED = (1 << 2),
17
1 file changed, 6 insertions(+), 5 deletions(-)
20
21
/*
22
- * Child from which to read all data that isn’t allocated in the
23
+ * Child from which to read all data that isn't allocated in the
24
* parent (i.e., the backing child); such data is copied to the
25
* parent through COW (and optionally COR).
26
* This field is mutually exclusive with DATA, METADATA, and
27
--
28
2.26.2
18
29
19
diff --git a/util/throttle.c b/util/throttle.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/util/throttle.c
22
+++ b/util/throttle.c
23
@@ -XXX,XX +XXX,XX @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
24
static int64_t throttle_compute_wait_for(ThrottleState *ts,
25
ThrottleDirection direction)
26
{
27
- BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
28
+ static const BucketType to_check[THROTTLE_MAX][4] = {
29
+ {THROTTLE_BPS_TOTAL,
30
THROTTLE_OPS_TOTAL,
31
THROTTLE_BPS_READ,
32
THROTTLE_OPS_READ},
33
@@ -XXX,XX +XXX,XX @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
34
int64_t wait, max_wait = 0;
35
int i;
36
37
- for (i = 0; i < 4; i++) {
38
+ for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) {
39
BucketType index = to_check[direction][i];
40
wait = throttle_compute_wait(&ts->cfg.buckets[index]);
41
if (wait > max_wait) {
42
@@ -XXX,XX +XXX,XX @@ bool throttle_schedule_timer(ThrottleState *ts,
43
void throttle_account(ThrottleState *ts, ThrottleDirection direction,
44
uint64_t size)
45
{
46
- const BucketType bucket_types_size[2][2] = {
47
+ static const BucketType bucket_types_size[THROTTLE_MAX][2] = {
48
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
49
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
50
};
51
- const BucketType bucket_types_units[2][2] = {
52
+ static const BucketType bucket_types_units[THROTTLE_MAX][2] = {
53
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
54
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
55
};
56
@@ -XXX,XX +XXX,XX @@ void throttle_account(ThrottleState *ts, ThrottleDirection direction,
57
units = (double) size / ts->cfg.op_size;
58
}
59
60
- for (i = 0; i < 2; i++) {
61
+ for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) {
62
LeakyBucket *bkt;
63
64
bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
65
--
66
2.41.0
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
Operations on a cryptodev are considered as *write* only, the callback
3
When we added io_uring AIO engine, we forgot to update qemu-options.hx,
4
of read direction is never invoked. Use NULL instead of an unreachable
4
so qemu(1) man page and qemu help were outdated.
5
path(cryptodev_backend_throttle_timer_cb on read direction).
6
5
7
The dummy read timer(never invoked) is already removed here, it means
6
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
8
that the 'FIXME' tag is no longer needed.
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Julia Suvorova <jusual@redhat.com>
9
Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
10
Message-Id: <20200924151511.131471-1-sgarzare@redhat.com>
11
---
12
qemu-options.hx | 10 ++++++----
13
1 file changed, 6 insertions(+), 4 deletions(-)
9
14
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
15
diff --git a/qemu-options.hx b/qemu-options.hx
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
16
index XXXXXXX..XXXXXXX 100644
12
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
17
--- a/qemu-options.hx
13
Message-Id: <20230728022006.1098509-6-pizhenwei@bytedance.com>
18
+++ b/qemu-options.hx
14
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
19
@@ -XXX,XX +XXX,XX @@ SRST
15
---
20
The path to the image file in the local filesystem
16
backends/cryptodev.c | 3 +--
21
17
1 file changed, 1 insertion(+), 2 deletions(-)
22
``aio``
23
- Specifies the AIO backend (threads/native, default: threads)
24
+ Specifies the AIO backend (threads/native/io_uring,
25
+ default: threads)
26
27
``locking``
28
Specifies whether the image file is protected with Linux OFD
29
@@ -XXX,XX +XXX,XX @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
30
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
31
" [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
32
" [,snapshot=on|off][,rerror=ignore|stop|report]\n"
33
- " [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
34
+ " [,werror=ignore|stop|report|enospc][,id=name]\n"
35
+ " [,aio=threads|native|io_uring]\n"
36
" [,readonly=on|off][,copy-on-read=on|off]\n"
37
" [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
38
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
39
@@ -XXX,XX +XXX,XX @@ SRST
40
The default mode is ``cache=writeback``.
41
42
``aio=aio``
43
- aio is "threads", or "native" and selects between pthread based
44
- disk I/O and native Linux AIO.
45
+ aio is "threads", "native", or "io_uring" and selects between pthread
46
+ based disk I/O, native Linux AIO, or Linux io_uring API.
47
48
``format=format``
49
Specify which disk format will be used rather than detecting the
50
--
51
2.26.2
18
52
19
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/backends/cryptodev.c
22
+++ b/backends/cryptodev.c
23
@@ -XXX,XX +XXX,XX @@ static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field,
24
if (!enabled) {
25
throttle_init(&backend->ts);
26
throttle_timers_init(&backend->tt, qemu_get_aio_context(),
27
- QEMU_CLOCK_REALTIME,
28
- cryptodev_backend_throttle_timer_cb, /* FIXME */
29
+ QEMU_CLOCK_REALTIME, NULL,
30
cryptodev_backend_throttle_timer_cb, backend);
31
}
32
33
--
34
2.41.0
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Eric Auger <eric.auger@redhat.com>
2
2
3
enum ThrottleDirection is already there, use ThrottleDirection instead
3
The IOVA allocator currently ignores host reserved regions.
4
of 'bool is_write' for throttle API, also modify related codes from
4
As a result some chosen IOVAs may collide with some of them,
5
block, fsdev, cryptodev and tests.
5
resulting in VFIO MAP_DMA errors later on. This happens on ARM
6
where the MSI reserved window quickly is encountered:
7
[0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
8
IOVA regions. So let's enumerate them in the prospect to avoid
9
them, later on.
6
10
7
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Eric Auger <eric.auger@redhat.com>
8
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
12
Message-id: 20200929085550.30926-2-eric.auger@redhat.com
9
Message-Id: <20230728022006.1098509-7-pizhenwei@bytedance.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
14
---
12
include/qemu/throttle.h | 5 +++--
15
util/vfio-helpers.c | 72 +++++++++++++++++++++++++++++++++++++++++++--
13
backends/cryptodev.c | 9 +++++----
16
1 file changed, 70 insertions(+), 2 deletions(-)
14
block/throttle-groups.c | 6 ++++--
15
fsdev/qemu-fsdev-throttle.c | 8 +++++---
16
tests/unit/test-throttle.c | 4 ++--
17
util/throttle.c | 31 +++++++++++++++++--------------
18
6 files changed, 36 insertions(+), 27 deletions(-)
19
17
20
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
18
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
21
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
22
--- a/include/qemu/throttle.h
20
--- a/util/vfio-helpers.c
23
+++ b/include/qemu/throttle.h
21
+++ b/util/vfio-helpers.c
24
@@ -XXX,XX +XXX,XX @@ void throttle_config_init(ThrottleConfig *cfg);
22
@@ -XXX,XX +XXX,XX @@ typedef struct {
25
/* usage */
23
uint64_t iova;
26
bool throttle_schedule_timer(ThrottleState *ts,
24
} IOVAMapping;
27
ThrottleTimers *tt,
25
28
- bool is_write);
26
+struct IOVARange {
29
+ ThrottleDirection direction);
27
+ uint64_t start;
30
28
+ uint64_t end;
31
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size);
29
+};
32
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
30
+
33
+ uint64_t size);
31
struct QEMUVFIOState {
34
void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
32
QemuMutex lock;
35
Error **errp);
33
36
void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var);
34
@@ -XXX,XX +XXX,XX @@ struct QEMUVFIOState {
37
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
35
int device;
38
index XXXXXXX..XXXXXXX 100644
36
RAMBlockNotifier ram_notifier;
39
--- a/backends/cryptodev.c
37
struct vfio_region_info config_region_info, bar_region_info[6];
40
+++ b/backends/cryptodev.c
38
+ struct IOVARange *usable_iova_ranges;
41
@@ -XXX,XX +XXX,XX @@ static void cryptodev_backend_throttle_timer_cb(void *opaque)
39
+ uint8_t nb_iova_ranges;
42
continue;
40
43
}
41
/* These fields are protected by @lock */
44
42
/* VFIO's IO virtual address space is managed by splitting into a few
45
- throttle_account(&backend->ts, true, ret);
43
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
46
+ throttle_account(&backend->ts, THROTTLE_WRITE, ret);
44
return ret == size ? 0 : -errno;
47
cryptodev_backend_operation(backend, op_info);
45
}
48
if (throttle_enabled(&backend->tc) &&
46
49
- throttle_schedule_timer(&backend->ts, &backend->tt, true)) {
47
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
50
+ throttle_schedule_timer(&backend->ts, &backend->tt,
48
+{
51
+ THROTTLE_WRITE)) {
49
+ struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
52
break;
50
+ struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
53
}
51
+ struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
52
+ int i;
53
+
54
+ while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
55
+ if (!cap->next) {
56
+ return;
57
+ }
58
+ cap = (struct vfio_info_cap_header *)(buf + cap->next);
59
+ }
60
+
61
+ cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
62
+
63
+ s->nb_iova_ranges = cap_iova_range->nr_iovas;
64
+ if (s->nb_iova_ranges > 1) {
65
+ s->usable_iova_ranges =
66
+ g_realloc(s->usable_iova_ranges,
67
+ s->nb_iova_ranges * sizeof(struct IOVARange));
68
+ }
69
+
70
+ for (i = 0; i < s->nb_iova_ranges; i++) {
71
+ s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
72
+ s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
73
+ }
74
+}
75
+
76
static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
77
Error **errp)
78
{
79
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
80
int i;
81
uint16_t pci_cmd;
82
struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
83
- struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
84
+ struct vfio_iommu_type1_info *iommu_info = NULL;
85
+ size_t iommu_info_size = sizeof(*iommu_info);
86
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
87
char *group_file = NULL;
88
89
+ s->usable_iova_ranges = NULL;
90
+
91
/* Create a new container */
92
s->container = open("/dev/vfio/vfio", O_RDWR);
93
94
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
95
goto fail;
54
}
96
}
55
@@ -XXX,XX +XXX,XX @@ int cryptodev_backend_crypto_operation(
97
56
goto do_account;
98
+ iommu_info = g_malloc0(iommu_info_size);
99
+ iommu_info->argsz = iommu_info_size;
100
+
101
/* Get additional IOMMU info */
102
- if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
103
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
104
error_setg_errno(errp, errno, "Failed to get IOMMU info");
105
ret = -errno;
106
goto fail;
57
}
107
}
58
108
59
- if (throttle_schedule_timer(&backend->ts, &backend->tt, true) ||
109
+ /*
60
+ if (throttle_schedule_timer(&backend->ts, &backend->tt, THROTTLE_WRITE) ||
110
+ * if the kernel does not report usable IOVA regions, choose
61
!QTAILQ_EMPTY(&backend->opinfos)) {
111
+ * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
62
QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next);
112
+ */
63
return 0;
113
+ s->nb_iova_ranges = 1;
64
@@ -XXX,XX +XXX,XX @@ do_account:
114
+ s->usable_iova_ranges = g_new0(struct IOVARange, 1);
65
return ret;
115
+ s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
116
+ s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
117
+
118
+ if (iommu_info->argsz > iommu_info_size) {
119
+ iommu_info_size = iommu_info->argsz;
120
+ iommu_info = g_realloc(iommu_info, iommu_info_size);
121
+ if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
122
+ ret = -errno;
123
+ goto fail;
124
+ }
125
+ collect_usable_iova_ranges(s, iommu_info);
126
+ }
127
+
128
s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
129
130
if (s->device < 0) {
131
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
132
if (ret) {
133
goto fail;
66
}
134
}
67
135
+ g_free(iommu_info);
68
- throttle_account(&backend->ts, true, ret);
136
return 0;
69
+ throttle_account(&backend->ts, THROTTLE_WRITE, ret);
137
fail:
70
138
+ g_free(s->usable_iova_ranges);
71
return cryptodev_backend_operation(backend, op_info);
139
+ s->usable_iova_ranges = NULL;
72
}
140
+ s->nb_iova_ranges = 0;
73
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
141
+ g_free(iommu_info);
74
index XXXXXXX..XXXXXXX 100644
142
close(s->group);
75
--- a/block/throttle-groups.c
143
fail_container:
76
+++ b/block/throttle-groups.c
144
close(s->container);
77
@@ -XXX,XX +XXX,XX @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
145
@@ -XXX,XX +XXX,XX @@ void qemu_vfio_close(QEMUVFIOState *s)
78
ThrottleState *ts = tgm->throttle_state;
146
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
79
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
80
ThrottleTimers *tt = &tgm->throttle_timers;
81
+ ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
82
bool must_wait;
83
84
if (qatomic_read(&tgm->io_limits_disabled)) {
85
@@ -XXX,XX +XXX,XX @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
86
return true;
87
}
147
}
88
148
ram_block_notifier_remove(&s->ram_notifier);
89
- must_wait = throttle_schedule_timer(ts, tt, is_write);
149
+ g_free(s->usable_iova_ranges);
90
+ must_wait = throttle_schedule_timer(ts, tt, direction);
150
+ s->nb_iova_ranges = 0;
91
151
qemu_vfio_reset(s);
92
/* If a timer just got armed, set tgm as the current token */
152
close(s->device);
93
if (must_wait) {
153
close(s->group);
94
@@ -XXX,XX +XXX,XX @@ void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm
95
bool must_wait;
96
ThrottleGroupMember *token;
97
ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
98
+ ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
99
100
assert(bytes >= 0);
101
102
@@ -XXX,XX +XXX,XX @@ void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm
103
}
104
105
/* The I/O will be executed, so do the accounting */
106
- throttle_account(tgm->throttle_state, is_write, bytes);
107
+ throttle_account(tgm->throttle_state, direction, bytes);
108
109
/* Schedule the next request */
110
schedule_next_request(tgm, is_write);
111
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/fsdev/qemu-fsdev-throttle.c
114
+++ b/fsdev/qemu-fsdev-throttle.c
115
@@ -XXX,XX +XXX,XX @@ void fsdev_throttle_init(FsThrottle *fst)
116
void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
117
struct iovec *iov, int iovcnt)
118
{
119
+ ThrottleDirection direction = is_write ? THROTTLE_WRITE : THROTTLE_READ;
120
+
121
if (throttle_enabled(&fst->cfg)) {
122
- if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
123
+ if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) ||
124
!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
125
qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
126
}
127
128
- throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
129
+ throttle_account(&fst->ts, direction, iov_size(iov, iovcnt));
130
131
if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
132
- !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
133
+ !throttle_schedule_timer(&fst->ts, &fst->tt, direction)) {
134
qemu_co_queue_next(&fst->throttled_reqs[is_write]);
135
}
136
}
137
diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/tests/unit/test-throttle.c
140
+++ b/tests/unit/test-throttle.c
141
@@ -XXX,XX +XXX,XX @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
142
throttle_config(&ts, QEMU_CLOCK_VIRTUAL, &cfg);
143
144
/* account a read */
145
- throttle_account(&ts, false, size);
146
+ throttle_account(&ts, THROTTLE_READ, size);
147
/* account a write */
148
- throttle_account(&ts, true, size);
149
+ throttle_account(&ts, THROTTLE_WRITE, size);
150
151
/* check total result */
152
index = to_test[is_ops][0];
153
diff --git a/util/throttle.c b/util/throttle.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/util/throttle.c
156
+++ b/util/throttle.c
157
@@ -XXX,XX +XXX,XX @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
158
159
/* This function compute the time that must be waited while this IO
160
*
161
- * @is_write: true if the current IO is a write, false if it's a read
162
+ * @direction: throttle direction
163
* @ret: time to wait
164
*/
165
static int64_t throttle_compute_wait_for(ThrottleState *ts,
166
- bool is_write)
167
+ ThrottleDirection direction)
168
{
169
BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
170
THROTTLE_OPS_TOTAL,
171
@@ -XXX,XX +XXX,XX @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
172
int i;
173
174
for (i = 0; i < 4; i++) {
175
- BucketType index = to_check[is_write][i];
176
+ BucketType index = to_check[direction][i];
177
wait = throttle_compute_wait(&ts->cfg.buckets[index]);
178
if (wait > max_wait) {
179
max_wait = wait;
180
@@ -XXX,XX +XXX,XX @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
181
182
/* compute the timer for this type of operation
183
*
184
- * @is_write: the type of operation
185
+ * @direction: throttle direction
186
* @now: the current clock timestamp
187
* @next_timestamp: the resulting timer
188
* @ret: true if a timer must be set
189
*/
190
static bool throttle_compute_timer(ThrottleState *ts,
191
- bool is_write,
192
+ ThrottleDirection direction,
193
int64_t now,
194
int64_t *next_timestamp)
195
{
196
@@ -XXX,XX +XXX,XX @@ static bool throttle_compute_timer(ThrottleState *ts,
197
throttle_do_leak(ts, now);
198
199
/* compute the wait time if any */
200
- wait = throttle_compute_wait_for(ts, is_write);
201
+ wait = throttle_compute_wait_for(ts, direction);
202
203
/* if the code must wait compute when the next timer should fire */
204
if (wait) {
205
@@ -XXX,XX +XXX,XX @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
206
* NOTE: this function is not unit tested due to it's usage of timer_mod
207
*
208
* @tt: the timers structure
209
- * @is_write: the type of operation (read/write)
210
+ * @direction: throttle direction
211
* @ret: true if the timer has been scheduled else false
212
*/
213
bool throttle_schedule_timer(ThrottleState *ts,
214
ThrottleTimers *tt,
215
- bool is_write)
216
+ ThrottleDirection direction)
217
{
218
int64_t now = qemu_clock_get_ns(tt->clock_type);
219
int64_t next_timestamp;
220
QEMUTimer *timer;
221
bool must_wait;
222
223
- timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ];
224
+ assert(direction < THROTTLE_MAX);
225
+ timer = tt->timers[direction];
226
assert(timer);
227
228
must_wait = throttle_compute_timer(ts,
229
- is_write,
230
+ direction,
231
now,
232
&next_timestamp);
233
234
@@ -XXX,XX +XXX,XX @@ bool throttle_schedule_timer(ThrottleState *ts,
235
236
/* do the accounting for this operation
237
*
238
- * @is_write: the type of operation (read/write)
239
+ * @direction: throttle direction
240
* @size: the size of the operation
241
*/
242
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
243
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
244
+ uint64_t size)
245
{
246
const BucketType bucket_types_size[2][2] = {
247
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
248
@@ -XXX,XX +XXX,XX @@ void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
249
double units = 1.0;
250
unsigned i;
251
252
+ assert(direction < THROTTLE_MAX);
253
/* if cfg.op_size is defined and smaller than size we compute unit count */
254
if (ts->cfg.op_size && size > ts->cfg.op_size) {
255
units = (double) size / ts->cfg.op_size;
256
@@ -XXX,XX +XXX,XX @@ void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
257
for (i = 0; i < 2; i++) {
258
LeakyBucket *bkt;
259
260
- bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
261
+ bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
262
bkt->level += size;
263
if (bkt->burst_length > 1) {
264
bkt->burst_level += size;
265
}
266
267
- bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
268
+ bkt = &ts->cfg.buckets[bucket_types_units[direction][i]];
269
bkt->level += units;
270
if (bkt->burst_length > 1) {
271
bkt->burst_level += units;
272
--
154
--
273
2.41.0
155
2.26.2
156
diff view generated by jsdifflib
1
From: zhenwei pi <pizhenwei@bytedance.com>
1
From: Eric Auger <eric.auger@redhat.com>
2
2
3
Reviewed-by: Alberto Garcia <berto@igalia.com>
3
Introduce the qemu_vfio_find_fixed/temp_iova helpers which
4
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
4
respectively allocate IOVAs from the bottom/top parts of the
5
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
5
usable IOVA range, without picking within host IOVA reserved
6
Message-Id: <20230728022006.1098509-5-pizhenwei@bytedance.com>
6
windows. The allocation remains basic: if the size is too big
7
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
7
for the remaining of the current usable IOVA range, we jump
8
to the next one, leaving a hole in the address map.
9
10
Signed-off-by: Eric Auger <eric.auger@redhat.com>
11
Message-id: 20200929085550.30926-3-eric.auger@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
13
---
9
tests/unit/test-throttle.c | 66 ++++++++++++++++++++++++++++++++++++++
14
util/vfio-helpers.c | 57 +++++++++++++++++++++++++++++++++++++++++----
10
1 file changed, 66 insertions(+)
15
1 file changed, 53 insertions(+), 4 deletions(-)
11
16
12
diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
17
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/unit/test-throttle.c
19
--- a/util/vfio-helpers.c
15
+++ b/tests/unit/test-throttle.c
20
+++ b/util/vfio-helpers.c
16
@@ -XXX,XX +XXX,XX @@ static void test_init(void)
21
@@ -XXX,XX +XXX,XX @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
17
throttle_timers_destroy(tt);
22
return true;
18
}
23
}
19
24
20
+static void test_init_readonly(void)
25
+static int
26
+qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
21
+{
27
+{
22
+ int i;
28
+ int i;
23
+
29
+
24
+ tt = &tgm.throttle_timers;
30
+ for (i = 0; i < s->nb_iova_ranges; i++) {
31
+ if (s->usable_iova_ranges[i].end < s->low_water_mark) {
32
+ continue;
33
+ }
34
+ s->low_water_mark =
35
+ MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
25
+
36
+
26
+ /* fill the structures with crap */
37
+ if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
27
+ memset(&ts, 1, sizeof(ts));
38
+ s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
28
+ memset(tt, 1, sizeof(*tt));
39
+ *iova = s->low_water_mark;
29
+
40
+ s->low_water_mark += size;
30
+ /* init structures */
41
+ return 0;
31
+ throttle_init(&ts);
42
+ }
32
+ throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
33
+ read_timer_cb, NULL, &ts);
34
+
35
+ /* check initialized fields */
36
+ g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
37
+ g_assert(tt->timers[THROTTLE_READ]);
38
+ g_assert(!tt->timers[THROTTLE_WRITE]);
39
+
40
+ /* check other fields where cleared */
41
+ g_assert(!ts.previous_leak);
42
+ g_assert(!ts.cfg.op_size);
43
+ for (i = 0; i < BUCKETS_COUNT; i++) {
44
+ g_assert(!ts.cfg.buckets[i].avg);
45
+ g_assert(!ts.cfg.buckets[i].max);
46
+ g_assert(!ts.cfg.buckets[i].level);
47
+ }
43
+ }
48
+
44
+ return -ENOMEM;
49
+ throttle_timers_destroy(tt);
50
+}
45
+}
51
+
46
+
52
+static void test_init_writeonly(void)
47
+static int
48
+qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
53
+{
49
+{
54
+ int i;
50
+ int i;
55
+
51
+
56
+ tt = &tgm.throttle_timers;
52
+ for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
53
+ if (s->usable_iova_ranges[i].start > s->high_water_mark) {
54
+ continue;
55
+ }
56
+ s->high_water_mark =
57
+ MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
57
+
58
+
58
+ /* fill the structures with crap */
59
+ if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
59
+ memset(&ts, 1, sizeof(ts));
60
+ s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
60
+ memset(tt, 1, sizeof(*tt));
61
+ *iova = s->high_water_mark - size;
61
+
62
+ s->high_water_mark = *iova;
62
+ /* init structures */
63
+ return 0;
63
+ throttle_init(&ts);
64
+ }
64
+ throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
65
+ NULL, write_timer_cb, &ts);
66
+
67
+ /* check initialized fields */
68
+ g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
69
+ g_assert(!tt->timers[THROTTLE_READ]);
70
+ g_assert(tt->timers[THROTTLE_WRITE]);
71
+
72
+ /* check other fields where cleared */
73
+ g_assert(!ts.previous_leak);
74
+ g_assert(!ts.cfg.op_size);
75
+ for (i = 0; i < BUCKETS_COUNT; i++) {
76
+ g_assert(!ts.cfg.buckets[i].avg);
77
+ g_assert(!ts.cfg.buckets[i].max);
78
+ g_assert(!ts.cfg.buckets[i].level);
79
+ }
65
+ }
80
+
66
+ return -ENOMEM;
81
+ throttle_timers_destroy(tt);
82
+}
67
+}
83
+
68
+
84
static void test_destroy(void)
69
/* Map [host, host + size) area into a contiguous IOVA address space, and store
85
{
70
* the result in @iova if not NULL. The caller need to make sure the area is
86
int i;
71
* aligned to page size, and mustn't overlap with existing mapping areas (split
87
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
72
@@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
88
g_test_add_func("/throttle/leak_bucket", test_leak_bucket);
73
goto out;
89
g_test_add_func("/throttle/compute_wait", test_compute_wait);
74
}
90
g_test_add_func("/throttle/init", test_init);
75
if (!temporary) {
91
+ g_test_add_func("/throttle/init_readonly", test_init_readonly);
76
- iova0 = s->low_water_mark;
92
+ g_test_add_func("/throttle/init_writeonly", test_init_writeonly);
77
+ if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
93
g_test_add_func("/throttle/destroy", test_destroy);
78
+ ret = -ENOMEM;
94
g_test_add_func("/throttle/have_timer", test_have_timer);
79
+ goto out;
95
g_test_add_func("/throttle/detach_attach", test_detach_attach);
80
+ }
81
+
82
mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
83
if (!mapping) {
84
ret = -ENOMEM;
85
@@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
86
qemu_vfio_undo_mapping(s, mapping, NULL);
87
goto out;
88
}
89
- s->low_water_mark += size;
90
qemu_vfio_dump_mappings(s);
91
} else {
92
- iova0 = s->high_water_mark - size;
93
+ if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
94
+ ret = -ENOMEM;
95
+ goto out;
96
+ }
97
ret = qemu_vfio_do_mapping(s, host, size, iova0);
98
if (ret) {
99
goto out;
100
}
101
- s->high_water_mark -= size;
102
}
103
}
104
if (iova) {
96
--
105
--
97
2.41.0
106
2.26.2
107
diff view generated by jsdifflib