1
The following changes since commit a0def594286d9110a6035e02eef558cf3cf5d847:
1
The following changes since commit 812b835fb4d23dd108b2f9802158472d50b73579:
2
2
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2017-01-30 10:23:20 +0000)
3
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-05-07' into staging (2019-05-09 16:31:12 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to acf6e5f0962c4be670d4a93ede77423512521876:
9
for you to fetch changes up to e84125761f78919fe63616d9888ea45e72dc956f:
10
10
11
sheepdog: reorganize check for overlapping requests (2017-02-01 00:17:20 -0500)
11
docs: add Security chapter to the documentation (2019-05-10 10:53:52 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches
14
Pull request
15
15
----------------------------------------------------------------
16
----------------------------------------------------------------
16
17
17
Paolo Bonzini (5):
18
Andrey Shinkevich (1):
18
sheepdog: remove unused cancellation support
19
block/io.c: fix for the allocation failure
19
sheepdog: reorganize coroutine flow
20
sheepdog: do not use BlockAIOCB
21
sheepdog: simplify inflight_aio_head management
22
sheepdog: reorganize check for overlapping requests
23
20
24
block/sheepdog.c | 289 ++++++++++++++++---------------------------------------
21
Jules Irenge (3):
25
1 file changed, 84 insertions(+), 205 deletions(-)
22
util/readline: add a space to fix errors by checkpatch tool
23
util: readline: replace tab indent by four spaces to fix checkpatch
24
errors
25
util/readline: Add braces to fix checkpatch errors
26
27
Nikita Alekseev (1):
28
block: Add coroutine_fn to bdrv_check_co_entry
29
30
Paolo Bonzini (1):
31
aio-posix: ensure poll mode is left when aio_notify is called
32
33
Stefan Hajnoczi (2):
34
docs: add Secure Coding Practices to developer docs
35
docs: add Security chapter to the documentation
36
37
Makefile | 2 +-
38
block.c | 2 +-
39
block/io.c | 2 +-
40
util/aio-posix.c | 12 +-
41
util/readline.c | 174 ++++++++++++++-----------
42
docs/devel/index.rst | 1 +
43
docs/devel/secure-coding-practices.rst | 106 +++++++++++++++
44
docs/security.texi | 131 +++++++++++++++++++
45
qemu-doc.texi | 3 +
46
9 files changed, 347 insertions(+), 86 deletions(-)
47
create mode 100644 docs/devel/secure-coding-practices.rst
48
create mode 100644 docs/security.texi
26
49
27
--
50
--
28
2.9.3
51
2.21.0
29
52
30
53
diff view generated by jsdifflib
New patch
1
From: Jules Irenge <jbi.octave@gmail.com>
1
2
3
util/readline: add a space to fix errors reported by checkpatch.pl tool
4
"ERROR: space required before the open parenthesis"
5
"ERROR: space required after that ..."
6
within "util/redline.c" file
7
8
Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
9
Reviewed-by: Thomas Huth <thuth@redhat.com>
10
Message-id: 20190401024406.10819-2-jbi.octave@gmail.com
11
Message-Id: <20190401024406.10819-2-jbi.octave@gmail.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
util/readline.c | 34 +++++++++++++++++-----------------
15
1 file changed, 17 insertions(+), 17 deletions(-)
16
17
diff --git a/util/readline.c b/util/readline.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/util/readline.c
20
+++ b/util/readline.c
21
@@ -XXX,XX +XXX,XX @@ static void readline_update(ReadLineState *rs)
22
23
if (rs->cmd_buf_size != rs->last_cmd_buf_size ||
24
memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) {
25
- for(i = 0; i < rs->last_cmd_buf_index; i++) {
26
+ for (i = 0; i < rs->last_cmd_buf_index; i++) {
27
rs->printf_func(rs->opaque, "\033[D");
28
}
29
rs->cmd_buf[rs->cmd_buf_size] = '\0';
30
if (rs->read_password) {
31
len = strlen(rs->cmd_buf);
32
- for(i = 0; i < len; i++)
33
+ for (i = 0; i < len; i++)
34
rs->printf_func(rs->opaque, "*");
35
} else {
36
rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
37
@@ -XXX,XX +XXX,XX @@ static void readline_update(ReadLineState *rs)
38
if (rs->cmd_buf_index != rs->last_cmd_buf_index) {
39
delta = rs->cmd_buf_index - rs->last_cmd_buf_index;
40
if (delta > 0) {
41
- for(i = 0;i < delta; i++) {
42
+ for (i = 0; i < delta; i++) {
43
rs->printf_func(rs->opaque, "\033[C");
44
}
45
} else {
46
delta = -delta;
47
- for(i = 0;i < delta; i++) {
48
+ for (i = 0; i < delta; i++) {
49
rs->printf_func(rs->opaque, "\033[D");
50
}
51
}
52
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
53
return;
54
if (rs->nb_completions == 1) {
55
len = strlen(rs->completions[0]);
56
- for(i = rs->completion_index; i < len; i++) {
57
+ for (i = rs->completion_index; i < len; i++) {
58
readline_insert_char(rs, rs->completions[0][i]);
59
}
60
/* extra space for next argument. XXX: make it more generic */
61
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
62
completion_comp);
63
rs->printf_func(rs->opaque, "\n");
64
max_width = 0;
65
- max_prefix = 0;    
66
- for(i = 0; i < rs->nb_completions; i++) {
67
+ max_prefix = 0;
68
+ for (i = 0; i < rs->nb_completions; i++) {
69
len = strlen(rs->completions[i]);
70
- if (i==0) {
71
+ if (i == 0) {
72
max_prefix = len;
73
} else {
74
if (len < max_prefix)
75
max_prefix = len;
76
- for(j=0; j<max_prefix; j++) {
77
+ for (j = 0; j < max_prefix; j++) {
78
if (rs->completions[i][j] != rs->completions[0][j])
79
max_prefix = j;
80
}
81
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
82
if (len > max_width)
83
max_width = len;
84
}
85
- if (max_prefix > 0)
86
- for(i = rs->completion_index; i < max_prefix; i++) {
87
+ if (max_prefix > 0)
88
+ for (i = rs->completion_index; i < max_prefix; i++) {
89
readline_insert_char(rs, rs->completions[0][i]);
90
}
91
max_width += 2;
92
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
93
max_width = 80;
94
nb_cols = 80 / max_width;
95
j = 0;
96
- for(i = 0; i < rs->nb_completions; i++) {
97
+ for (i = 0; i < rs->nb_completions; i++) {
98
rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]);
99
if (++j == nb_cols || i == (rs->nb_completions - 1)) {
100
rs->printf_func(rs->opaque, "\n");
101
@@ -XXX,XX +XXX,XX @@ static void readline_clear_screen(ReadLineState *rs)
102
/* return true if command handled */
103
void readline_handle_byte(ReadLineState *rs, int ch)
104
{
105
- switch(rs->esc_state) {
106
+ switch (rs->esc_state) {
107
case IS_NORM:
108
- switch(ch) {
109
+ switch (ch) {
110
case 1:
111
readline_bol(rs);
112
break;
113
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
114
}
115
break;
116
case IS_CSI:
117
- switch(ch) {
118
+ switch (ch) {
119
    case 'A':
120
    case 'F':
121
     readline_up_char(rs);
122
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
123
rs->esc_param = rs->esc_param * 10 + (ch - '0');
124
goto the_end;
125
case '~':
126
- switch(rs->esc_param) {
127
+ switch (rs->esc_param) {
128
case 1:
129
readline_bol(rs);
130
break;
131
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
132
the_end:
133
break;
134
case IS_SS3:
135
- switch(ch) {
136
+ switch (ch) {
137
case 'F':
138
readline_eol(rs);
139
break;
140
--
141
2.21.0
142
143
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Jules Irenge <jbi.octave@gmail.com>
2
2
3
Wrap the code that was copied repeatedly in the two functions,
3
Replace tab indent by four spaces to fix errors issued by checkpatch.pl tool
4
sd_aio_setup and sd_aio_complete.
4
"ERROR: code indent should never use tabs" within "util/readline.c" file.
5
5
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
7
Message-id: 20161129113245.32724-6-pbonzini@redhat.com
7
Reviewed-by: Thomas Huth <thuth@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
8
Message-id: 20190401024406.10819-3-jbi.octave@gmail.com
9
Message-Id: <20190401024406.10819-3-jbi.octave@gmail.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
11
---
10
block/sheepdog.c | 66 ++++++++++++++++++++++++++------------------------------
12
util/readline.c | 98 ++++++++++++++++++++++++-------------------------
11
1 file changed, 30 insertions(+), 36 deletions(-)
13
1 file changed, 49 insertions(+), 49 deletions(-)
12
14
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
15
diff --git a/util/readline.c b/util/readline.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
17
--- a/util/readline.c
16
+++ b/block/sheepdog.c
18
+++ b/util/readline.c
17
@@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
19
@@ -XXX,XX +XXX,XX @@ static void readline_up_char(ReadLineState *rs)
18
return aio_req;
20
int idx;
19
}
21
20
22
if (rs->hist_entry == 0)
21
+static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
23
-    return;
22
+{
24
+ return;
23
+ SheepdogAIOCB *cb;
25
if (rs->hist_entry == -1) {
24
+
26
-    /* Find latest entry */
25
+retry:
27
-    for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
26
+ QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
28
-     if (rs->history[idx] == NULL)
27
+ if (AIOCBOverlapping(acb, cb)) {
29
-        break;
28
+ qemu_co_queue_wait(&s->overlapping_queue);
30
-    }
29
+ goto retry;
31
-    rs->hist_entry = idx;
32
+ /* Find latest entry */
33
+ for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
34
+ if (rs->history[idx] == NULL)
35
+ break;
30
+ }
36
+ }
31
+ }
37
+ rs->hist_entry = idx;
32
+}
38
}
33
+
39
rs->hist_entry--;
34
static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
40
if (rs->hist_entry >= 0) {
35
QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
41
-    pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
36
int type)
42
+ pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
37
@@ -XXX,XX +XXX,XX @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
43
rs->history[rs->hist_entry]);
38
acb->min_dirty_data_idx = UINT32_MAX;
44
-    rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
39
acb->max_dirty_data_idx = 0;
45
+ rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
40
acb->aiocb_type = type;
41
+
42
+ if (type == AIOCB_FLUSH_CACHE) {
43
+ return;
44
+ }
45
+
46
+ wait_for_overlapping_aiocb(s, acb);
47
+ QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
48
}
49
50
/* Return -EIO in case of error, file descriptor on success */
51
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
52
}
46
}
53
}
47
}
54
48
55
-static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
49
@@ -XXX,XX +XXX,XX @@ static void readline_down_char(ReadLineState *rs)
56
+static void sd_aio_complete(SheepdogAIOCB *acb)
50
return;
57
{
51
if (rs->hist_entry < READLINE_MAX_CMDS - 1 &&
58
- SheepdogAIOCB *cb;
52
rs->history[++rs->hist_entry] != NULL) {
59
-
53
-    pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
60
- QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
54
+ pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
61
- if (AIOCBOverlapping(aiocb, cb)) {
55
rs->history[rs->hist_entry]);
62
- return true;
56
} else {
63
- }
57
rs->cmd_buf[0] = 0;
64
+ if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
58
-    rs->hist_entry = -1;
59
+ rs->hist_entry = -1;
60
}
61
rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
62
}
63
@@ -XXX,XX +XXX,XX @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
64
int idx;
65
66
if (cmdline[0] == '\0')
67
-    return;
65
+ return;
68
+ return;
69
new_entry = NULL;
70
if (rs->hist_entry != -1) {
71
-    /* We were editing an existing history entry: replace it */
72
-    hist_entry = rs->history[rs->hist_entry];
73
-    idx = rs->hist_entry;
74
-    if (strcmp(hist_entry, cmdline) == 0) {
75
-     goto same_entry;
76
-    }
77
+ /* We were editing an existing history entry: replace it */
78
+ hist_entry = rs->history[rs->hist_entry];
79
+ idx = rs->hist_entry;
80
+ if (strcmp(hist_entry, cmdline) == 0) {
81
+ goto same_entry;
82
+ }
66
}
83
}
67
84
/* Search cmdline in history buffers */
68
- QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
85
for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
69
- return false;
86
-    hist_entry = rs->history[idx];
70
+ QLIST_REMOVE(acb, aiocb_siblings);
87
-    if (hist_entry == NULL)
71
+ qemu_co_queue_restart_all(&acb->s->overlapping_queue);
88
-     break;
72
}
89
-    if (strcmp(hist_entry, cmdline) == 0) {
73
90
-    same_entry:
74
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
91
-     new_entry = hist_entry;
75
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
92
-     /* Put this entry at the end of history */
93
-     memmove(&rs->history[idx], &rs->history[idx + 1],
94
-         (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
95
-     rs->history[READLINE_MAX_CMDS - 1] = NULL;
96
-     for (; idx < READLINE_MAX_CMDS; idx++) {
97
-        if (rs->history[idx] == NULL)
98
-         break;
99
-     }
100
-     break;
101
-    }
102
+ hist_entry = rs->history[idx];
103
+ if (hist_entry == NULL)
104
+ break;
105
+ if (strcmp(hist_entry, cmdline) == 0) {
106
+ same_entry:
107
+ new_entry = hist_entry;
108
+ /* Put this entry at the end of history */
109
+ memmove(&rs->history[idx], &rs->history[idx + 1],
110
+ (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
111
+ rs->history[READLINE_MAX_CMDS - 1] = NULL;
112
+ for (; idx < READLINE_MAX_CMDS; idx++) {
113
+ if (rs->history[idx] == NULL)
114
+ break;
115
+ }
116
+ break;
117
+ }
76
}
118
}
77
119
if (idx == READLINE_MAX_CMDS) {
78
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
120
-    /* Need to get one free slot */
79
-
121
+ /* Need to get one free slot */
80
-retry:
122
g_free(rs->history[0]);
81
- if (check_overlapping_aiocb(s, &acb)) {
123
-    memmove(rs->history, &rs->history[1],
82
- qemu_co_queue_wait(&s->overlapping_queue);
124
-     (READLINE_MAX_CMDS - 1) * sizeof(char *));
83
- goto retry;
125
-    rs->history[READLINE_MAX_CMDS - 1] = NULL;
84
- }
126
-    idx = READLINE_MAX_CMDS - 1;
85
-
127
+ memmove(rs->history, &rs->history[1],
86
sd_co_rw_vector(&acb);
128
+ (READLINE_MAX_CMDS - 1) * sizeof(char *));
87
sd_write_done(&acb);
129
+ rs->history[READLINE_MAX_CMDS - 1] = NULL;
88
+ sd_aio_complete(&acb);
130
+ idx = READLINE_MAX_CMDS - 1;
89
90
- QLIST_REMOVE(&acb, aiocb_siblings);
91
- qemu_co_queue_restart_all(&s->overlapping_queue);
92
return acb.ret;
93
}
94
95
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
96
BDRVSheepdogState *s = bs->opaque;
97
98
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
99
-
100
-retry:
101
- if (check_overlapping_aiocb(s, &acb)) {
102
- qemu_co_queue_wait(&s->overlapping_queue);
103
- goto retry;
104
- }
105
-
106
sd_co_rw_vector(&acb);
107
+ sd_aio_complete(&acb);
108
109
- QLIST_REMOVE(&acb, aiocb_siblings);
110
- qemu_co_queue_restart_all(&s->overlapping_queue);
111
return acb.ret;
112
}
113
114
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
115
if (--acb.nr_pending) {
116
qemu_coroutine_yield();
117
}
131
}
118
+
132
if (new_entry == NULL)
119
+ sd_aio_complete(&acb);
133
new_entry = g_strdup(cmdline);
120
return acb.ret;
134
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
121
}
135
case 8:
122
136
readline_backspace(rs);
123
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
137
break;
124
}
138
-    case 155:
125
sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
139
+ case 155:
126
count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
140
rs->esc_state = IS_CSI;
127
-
141
-     break;
128
-retry:
142
+ break;
129
- if (check_overlapping_aiocb(s, &acb)) {
143
default:
130
- qemu_co_queue_wait(&s->overlapping_queue);
144
if (ch >= 32) {
131
- goto retry;
145
readline_insert_char(rs, ch);
132
- }
146
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
133
-
147
break;
134
sd_co_rw_vector(&acb);
148
case IS_CSI:
135
+ sd_aio_complete(&acb);
149
switch (ch) {
136
150
-    case 'A':
137
- QLIST_REMOVE(&acb, aiocb_siblings);
151
-    case 'F':
138
- qemu_co_queue_restart_all(&s->overlapping_queue);
152
-     readline_up_char(rs);
139
return acb.ret;
153
-     break;
140
}
154
-    case 'B':
141
155
-    case 'E':
156
-     readline_down_char(rs);
157
-     break;
158
+ case 'A':
159
+ case 'F':
160
+ readline_up_char(rs);
161
+ break;
162
+ case 'B':
163
+ case 'E':
164
+ readline_down_char(rs);
165
+ break;
166
case 'D':
167
readline_backward_char(rs);
168
break;
142
--
169
--
143
2.9.3
170
2.21.0
144
171
145
172
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Jules Irenge <jbi.octave@gmail.com>
2
2
3
Delimit co_recv's lifetime clearly in aio_read_response.
3
Add braces to fix errors issued by checkpatch.pl tool
4
"ERROR: braces {} are necessary for all arms of this statement"
5
Within "util/readline.c" file
6
Message-Id: <20190330112142.14082-1-jbi.octave@gmail.com>
4
7
5
Do a simple qemu_coroutine_enter in aio_read_response, letting
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
sd_co_writev call sd_write_done.
9
---
10
util/readline.c | 50 ++++++++++++++++++++++++++++++++-----------------
11
1 file changed, 33 insertions(+), 17 deletions(-)
7
12
8
Handle nr_pending in the same way in sd_co_rw_vector,
13
diff --git a/util/readline.c b/util/readline.c
9
sd_write_done and sd_co_flush_to_disk.
10
11
Remove sd_co_rw_vector's return value; just leave with no
12
pending requests.
13
14
[Jeff: added missing 'return' back, spotted by Paolo after
15
series was applied.]
16
17
Signed-off-by: Jeff Cody <jcody@redhat.com>
18
---
19
block/sheepdog.c | 115 ++++++++++++++++++++-----------------------------------
20
1 file changed, 42 insertions(+), 73 deletions(-)
21
22
diff --git a/block/sheepdog.c b/block/sheepdog.c
23
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
24
--- a/block/sheepdog.c
15
--- a/util/readline.c
25
+++ b/block/sheepdog.c
16
+++ b/util/readline.c
26
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
17
@@ -XXX,XX +XXX,XX @@ static void readline_update(ReadLineState *rs)
27
enum AIOCBState aiocb_type;
18
rs->cmd_buf[rs->cmd_buf_size] = '\0';
28
19
if (rs->read_password) {
29
Coroutine *coroutine;
20
len = strlen(rs->cmd_buf);
30
- void (*aio_done_func)(SheepdogAIOCB *);
21
- for (i = 0; i < len; i++)
31
-
22
+ for (i = 0; i < len; i++) {
32
int nr_pending;
23
rs->printf_func(rs->opaque, "*");
33
24
+ }
34
uint32_t min_affect_data_idx;
25
} else {
35
@@ -XXX,XX +XXX,XX @@ static const char * sd_strerror(int err)
26
rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
36
*
27
}
37
* 1. In sd_co_rw_vector, we send the I/O requests to the server and
28
@@ -XXX,XX +XXX,XX @@ static void readline_up_char(ReadLineState *rs)
38
* link the requests to the inflight_list in the
29
{
39
- * BDRVSheepdogState. The function exits without waiting for
30
int idx;
40
+ * BDRVSheepdogState. The function yields while waiting for
31
41
* receiving the response.
32
- if (rs->hist_entry == 0)
42
*
33
+ if (rs->hist_entry == 0) {
43
* 2. We receive the response in aio_read_response, the fd handler to
34
return;
44
- * the sheepdog connection. If metadata update is needed, we send
35
+ }
45
- * the write request to the vdi object in sd_write_done, the write
36
if (rs->hist_entry == -1) {
46
- * completion function. We switch back to sd_co_readv/writev after
37
/* Find latest entry */
47
- * all the requests belonging to the AIOCB are finished.
38
for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
48
+ * the sheepdog connection. We switch back to sd_co_readv/sd_writev
39
- if (rs->history[idx] == NULL)
49
+ * after all the requests belonging to the AIOCB are finished. If
40
+ if (rs->history[idx] == NULL) {
50
+ * needed, sd_co_writev will send another requests for the vdi object.
41
break;
51
*/
42
+ }
52
43
}
53
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
44
rs->hist_entry = idx;
54
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
45
}
55
acb->nr_pending--;
46
@@ -XXX,XX +XXX,XX @@ static void readline_up_char(ReadLineState *rs)
56
}
47
57
48
static void readline_down_char(ReadLineState *rs)
58
-static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
49
{
59
-{
50
- if (rs->hist_entry == -1)
60
- qemu_coroutine_enter(acb->coroutine);
51
+ if (rs->hist_entry == -1) {
61
- qemu_aio_unref(acb);
52
return;
62
-}
53
+ }
63
-
54
if (rs->hist_entry < READLINE_MAX_CMDS - 1 &&
64
static const AIOCBInfo sd_aiocb_info = {
55
rs->history[++rs->hist_entry] != NULL) {
65
.aiocb_size = sizeof(SheepdogAIOCB),
56
pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
66
};
57
@@ -XXX,XX +XXX,XX @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
67
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
58
char *hist_entry, *new_entry;
68
acb->sector_num = sector_num;
59
int idx;
69
acb->nb_sectors = nb_sectors;
60
70
61
- if (cmdline[0] == '\0')
71
- acb->aio_done_func = NULL;
62
+ if (cmdline[0] == '\0') {
72
acb->coroutine = qemu_coroutine_self();
63
return;
73
acb->ret = 0;
64
+ }
74
acb->nr_pending = 0;
65
new_entry = NULL;
75
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
66
if (rs->hist_entry != -1) {
76
67
/* We were editing an existing history entry: replace it */
77
switch (acb->aiocb_type) {
68
@@ -XXX,XX +XXX,XX @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
78
case AIOCB_WRITE_UDATA:
69
/* Search cmdline in history buffers */
79
- /* this coroutine context is no longer suitable for co_recv
70
for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
80
- * because we may send data to update vdi objects */
71
hist_entry = rs->history[idx];
81
- s->co_recv = NULL;
72
- if (hist_entry == NULL)
82
if (!is_data_obj(aio_req->oid)) {
73
+ if (hist_entry == NULL) {
74
break;
75
+ }
76
if (strcmp(hist_entry, cmdline) == 0) {
77
same_entry:
78
new_entry = hist_entry;
79
@@ -XXX,XX +XXX,XX @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
80
(READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
81
rs->history[READLINE_MAX_CMDS - 1] = NULL;
82
for (; idx < READLINE_MAX_CMDS; idx++) {
83
- if (rs->history[idx] == NULL)
84
+ if (rs->history[idx] == NULL) {
85
break;
86
+ }
87
}
83
break;
88
break;
84
}
89
}
85
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
90
@@ -XXX,XX +XXX,XX @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
91
rs->history[READLINE_MAX_CMDS - 1] = NULL;
92
idx = READLINE_MAX_CMDS - 1;
93
}
94
- if (new_entry == NULL)
95
+ if (new_entry == NULL) {
96
new_entry = g_strdup(cmdline);
97
+ }
98
rs->history[idx] = new_entry;
99
rs->hist_entry = -1;
100
}
101
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
102
g_free(cmdline);
103
104
/* no completion found */
105
- if (rs->nb_completions <= 0)
106
+ if (rs->nb_completions <= 0) {
107
return;
108
+ }
109
if (rs->nb_completions == 1) {
110
len = strlen(rs->completions[0]);
111
for (i = rs->completion_index; i < len; i++) {
112
readline_insert_char(rs, rs->completions[0][i]);
86
}
113
}
87
}
114
/* extra space for next argument. XXX: make it more generic */
88
115
- if (len > 0 && rs->completions[0][len - 1] != '/')
89
+ /* No more data for this aio_req (reload_inode below uses its own file
116
+ if (len > 0 && rs->completions[0][len - 1] != '/') {
90
+ * descriptor handler which doesn't use co_recv).
117
readline_insert_char(rs, ' ');
91
+ */
118
+ }
92
+ s->co_recv = NULL;
119
} else {
93
+
120
qsort(rs->completions, rs->nb_completions, sizeof(char *),
94
switch (rsp.result) {
121
completion_comp);
95
case SD_RES_SUCCESS:
122
@@ -XXX,XX +XXX,XX @@ static void readline_completion(ReadLineState *rs)
96
break;
123
if (i == 0) {
97
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
124
max_prefix = len;
98
aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
125
} else {
126
- if (len < max_prefix)
127
+ if (len < max_prefix) {
128
max_prefix = len;
129
+ }
130
for (j = 0; j < max_prefix; j++) {
131
- if (rs->completions[i][j] != rs->completions[0][j])
132
+ if (rs->completions[i][j] != rs->completions[0][j]) {
133
max_prefix = j;
134
+ }
135
}
136
}
137
- if (len > max_width)
138
+ if (len > max_width) {
139
max_width = len;
140
+ }
99
}
141
}
100
resend_aioreq(s, aio_req);
142
if (max_prefix > 0)
101
- goto out;
143
for (i = rs->completion_index; i < max_prefix; i++) {
102
+ return;
144
readline_insert_char(rs, rs->completions[0][i]);
103
default:
145
}
104
acb->ret = -EIO;
146
max_width += 2;
105
error_report("%s", sd_strerror(rsp.result));
147
- if (max_width < 10)
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
148
+ if (max_width < 10) {
107
* We've finished all requests which belong to the AIOCB, so
149
max_width = 10;
108
* we can switch back to sd_co_readv/writev now.
150
- else if (max_width > 80)
109
*/
151
+ } else if (max_width > 80) {
110
- acb->aio_done_func(acb);
152
max_width = 80;
111
+ qemu_coroutine_enter(acb->coroutine);
153
+ }
112
}
154
nb_cols = 80 / max_width;
113
-out:
155
j = 0;
114
- s->co_recv = NULL;
156
for (i = 0; i < rs->nb_completions; i++) {
115
+
157
@@ -XXX,XX +XXX,XX @@ void readline_handle_byte(ReadLineState *rs, int ch)
116
return;
158
case 10:
117
+
159
case 13:
118
err:
160
rs->cmd_buf[rs->cmd_buf_size] = '\0';
119
- s->co_recv = NULL;
161
- if (!rs->read_password)
120
reconnect_to_sdog(opaque);
162
+ if (!rs->read_password) {
163
readline_hist_add(rs, rs->cmd_buf);
164
+ }
165
rs->printf_func(rs->opaque, "\n");
166
rs->cmd_buf_index = 0;
167
rs->cmd_buf_size = 0;
168
@@ -XXX,XX +XXX,XX @@ void readline_restart(ReadLineState *rs)
169
170
const char *readline_get_history(ReadLineState *rs, unsigned int index)
171
{
172
- if (index >= READLINE_MAX_CMDS)
173
+ if (index >= READLINE_MAX_CMDS) {
174
return NULL;
175
+ }
176
return rs->history[index];
121
}
177
}
122
178
123
@@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
124
/*
125
* This function is called after writing data objects. If we need to
126
* update metadata, this sends a write request to the vdi object.
127
- * Otherwise, this switches back to sd_co_readv/writev.
128
*/
129
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
130
{
131
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
132
mx = acb->max_dirty_data_idx;
133
if (mn <= mx) {
134
/* we need to update the vdi object. */
135
+ ++acb->nr_pending;
136
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
137
mn * sizeof(s->inode.data_vdi_id[0]);
138
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
139
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
140
data_len, offset, 0, false, 0, offset);
141
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
142
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
143
-
144
- acb->aio_done_func = sd_finish_aiocb;
145
- acb->aiocb_type = AIOCB_WRITE_UDATA;
146
- return;
147
+ if (--acb->nr_pending) {
148
+ qemu_coroutine_yield();
149
+ }
150
}
151
-
152
- sd_finish_aiocb(acb);
153
}
154
155
/* Delete current working VDI on the snapshot chain */
156
@@ -XXX,XX +XXX,XX @@ out:
157
* Returns 1 when we need to wait a response, 0 when there is no sent
158
* request and -errno in error cases.
159
*/
160
-static int coroutine_fn sd_co_rw_vector(void *p)
161
+static void coroutine_fn sd_co_rw_vector(void *p)
162
{
163
SheepdogAIOCB *acb = p;
164
int ret = 0;
165
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p)
166
ret = sd_create_branch(s);
167
if (ret) {
168
acb->ret = -EIO;
169
- goto out;
170
+ return;
171
}
172
}
173
174
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p)
175
idx++;
176
done += len;
177
}
178
-out:
179
- if (!--acb->nr_pending) {
180
- return acb->ret;
181
+ if (--acb->nr_pending) {
182
+ qemu_coroutine_yield();
183
}
184
- return 1;
185
}
186
187
static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
188
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
189
}
190
191
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
192
- acb->aio_done_func = sd_write_done;
193
acb->aiocb_type = AIOCB_WRITE_UDATA;
194
195
retry:
196
@@ -XXX,XX +XXX,XX @@ retry:
197
goto retry;
198
}
199
200
- ret = sd_co_rw_vector(acb);
201
- if (ret <= 0) {
202
- QLIST_REMOVE(acb, aiocb_siblings);
203
- qemu_co_queue_restart_all(&s->overlapping_queue);
204
- qemu_aio_unref(acb);
205
- return ret;
206
- }
207
-
208
- qemu_coroutine_yield();
209
+ sd_co_rw_vector(acb);
210
+ sd_write_done(acb);
211
212
QLIST_REMOVE(acb, aiocb_siblings);
213
qemu_co_queue_restart_all(&s->overlapping_queue);
214
-
215
- return acb->ret;
216
+ ret = acb->ret;
217
+ qemu_aio_unref(acb);
218
+ return ret;
219
}
220
221
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
222
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
223
224
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
225
acb->aiocb_type = AIOCB_READ_UDATA;
226
- acb->aio_done_func = sd_finish_aiocb;
227
228
retry:
229
if (check_overlapping_aiocb(s, acb)) {
230
@@ -XXX,XX +XXX,XX @@ retry:
231
goto retry;
232
}
233
234
- ret = sd_co_rw_vector(acb);
235
- if (ret <= 0) {
236
- QLIST_REMOVE(acb, aiocb_siblings);
237
- qemu_co_queue_restart_all(&s->overlapping_queue);
238
- qemu_aio_unref(acb);
239
- return ret;
240
- }
241
-
242
- qemu_coroutine_yield();
243
+ sd_co_rw_vector(acb);
244
245
QLIST_REMOVE(acb, aiocb_siblings);
246
qemu_co_queue_restart_all(&s->overlapping_queue);
247
- return acb->ret;
248
+ ret = acb->ret;
249
+ qemu_aio_unref(acb);
250
+ return ret;
251
}
252
253
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
254
{
255
BDRVSheepdogState *s = bs->opaque;
256
SheepdogAIOCB *acb;
257
+ int ret;
258
AIOReq *aio_req;
259
260
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
261
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
262
263
acb = sd_aio_setup(bs, NULL, 0, 0);
264
acb->aiocb_type = AIOCB_FLUSH_CACHE;
265
- acb->aio_done_func = sd_finish_aiocb;
266
267
+ acb->nr_pending++;
268
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
269
0, 0, 0, false, 0, 0);
270
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
271
add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
272
273
- qemu_coroutine_yield();
274
- return acb->ret;
275
+ if (--acb->nr_pending) {
276
+ qemu_coroutine_yield();
277
+ }
278
+ ret = acb->ret;
279
+ qemu_aio_unref(acb);
280
+ return ret;
281
}
282
283
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
284
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
285
acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
286
count >> BDRV_SECTOR_BITS);
287
acb->aiocb_type = AIOCB_DISCARD_OBJ;
288
- acb->aio_done_func = sd_finish_aiocb;
289
290
retry:
291
if (check_overlapping_aiocb(s, acb)) {
292
@@ -XXX,XX +XXX,XX @@ retry:
293
goto retry;
294
}
295
296
- ret = sd_co_rw_vector(acb);
297
- if (ret <= 0) {
298
- QLIST_REMOVE(acb, aiocb_siblings);
299
- qemu_co_queue_restart_all(&s->overlapping_queue);
300
- qemu_aio_unref(acb);
301
- return ret;
302
- }
303
-
304
- qemu_coroutine_yield();
305
+ sd_co_rw_vector(acb);
306
307
QLIST_REMOVE(acb, aiocb_siblings);
308
qemu_co_queue_restart_all(&s->overlapping_queue);
309
-
310
- return acb->ret;
311
+ ret = acb->ret;
312
+ qemu_aio_unref(acb);
313
+ return ret;
314
}
315
316
static coroutine_fn int64_t
317
--
179
--
318
2.9.3
180
2.21.0
319
181
320
182
diff view generated by jsdifflib
New patch
1
From: Nikita Alekseev <n.alekseev2104@gmail.com>
1
2
3
bdrv_check_co_entry calls bdrv_co_check, which is a coroutine function.
4
Thus, it also needs to be marked as a coroutine.
5
6
Signed-off-by: Nikita Alekseev <n.alekseev2104@gmail.com>
7
Message-id: 20190401093051.16488-1-n.alekseev2104@gmail.com
8
Message-Id: <20190401093051.16488-1-n.alekseev2104@gmail.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/block.c b/block.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
17
+++ b/block.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct CheckCo {
19
int ret;
20
} CheckCo;
21
22
-static void bdrv_check_co_entry(void *opaque)
23
+static void coroutine_fn bdrv_check_co_entry(void *opaque)
24
{
25
CheckCo *cco = opaque;
26
cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
27
--
28
2.21.0
29
30
diff view generated by jsdifflib
New patch
1
From: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
1
2
3
On a file system used by the customer, fallocate() returns an error
4
if the block is not properly aligned. So, bdrv_co_pwrite_zeroes()
5
fails. We can handle that case the same way as it is done for the
6
unsupported cases, namely, call to bdrv_driver_pwritev() that writes
7
zeroes to an image for the unaligned chunk of the block.
8
9
Suggested-by: Denis V. Lunev <den@openvz.org>
10
Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
11
Reviewed-by: John Snow <jsnow@redhat.com>
12
Message-id: 1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com
13
Message-Id: <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
block/io.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/block/io.c b/block/io.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/io.c
22
+++ b/block/io.c
23
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
24
assert(!bs->supported_zero_flags);
25
}
26
27
- if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
28
+ if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
29
/* Fall back to bounce buffer if write zeroes is unsupported */
30
BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
31
32
--
33
2.21.0
34
35
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add to the list in add_aio_request and, indirectly, resend_aioreq. Inline
3
With aio=thread, adaptive polling makes latency worse rather than
4
free_aio_req in the caller, it does not simply undo alloc_aio_req's job.
4
better, because it delays the execution of the ThreadPool's
5
completion bottom half.
5
6
7
event_notifier_poll() does run while polling, detecting that
8
a bottom half was scheduled by a worker thread, but because
9
ctx->notifier is explicitly ignored in run_poll_handlers_once(),
10
scheduling the BH does not count as making progress and
11
run_poll_handlers() keeps running. Fix this by recomputing
12
the deadline after *timeout could have changed.
13
14
With this change, ThreadPool still cannot participate in polling
15
but at least it does not suffer from extra latency.
16
17
Reported-by: Sergio Lopez <slp@redhat.com>
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
18
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-id: 20161129113245.32724-5-pbonzini@redhat.com
19
Message-id: 20190409122823.12416-1-pbonzini@redhat.com
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
20
Cc: Stefan Hajnoczi <stefanha@gmail.com>
21
Cc: Kevin Wolf <kwolf@redhat.com>
22
Cc: qemu-block@nongnu.org
23
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
24
Message-Id: <1553692145-86728-1-git-send-email-pbonzini@redhat.com>
25
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
26
Message-Id: <20190409122823.12416-1-pbonzini@redhat.com>
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
28
---
10
block/sheepdog.c | 23 ++++++-----------------
29
util/aio-posix.c | 12 ++++++++----
11
1 file changed, 6 insertions(+), 17 deletions(-)
30
1 file changed, 8 insertions(+), 4 deletions(-)
12
31
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
32
diff --git a/util/aio-posix.c b/util/aio-posix.c
14
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
34
--- a/util/aio-posix.c
16
+++ b/block/sheepdog.c
35
+++ b/util/aio-posix.c
17
@@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
36
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
18
return aio_req;
37
if (!node->deleted && node->io_poll &&
19
}
38
aio_node_check(ctx, node->is_external) &&
20
39
node->io_poll(node->opaque)) {
21
-static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
40
+ /*
22
-{
41
+ * Polling was successful, exit try_poll_mode immediately
23
- SheepdogAIOCB *acb = aio_req->aiocb;
42
+ * to adjust the next polling time.
24
-
43
+ */
25
- QLIST_REMOVE(aio_req, aio_siblings);
44
*timeout = 0;
26
- g_free(aio_req);
45
if (node->opaque != &ctx->notifier) {
27
-
46
progress = true;
28
- acb->nr_pending--;
47
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
29
-}
48
do {
30
-
49
progress = run_poll_handlers_once(ctx, timeout);
31
static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
50
elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
32
QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
51
- } while (!progress && elapsed_time < max_ns
33
int type)
52
- && !atomic_read(&ctx->poll_disable_cnt));
34
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
53
+ max_ns = qemu_soonest_timeout(*timeout, max_ns);
35
while (!QLIST_EMPTY(&s->failed_aio_head)) {
54
+ assert(!(max_ns && progress));
36
aio_req = QLIST_FIRST(&s->failed_aio_head);
55
+ } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt));
37
QLIST_REMOVE(aio_req, aio_siblings);
56
38
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
57
/* If time has passed with no successful polling, adjust *timeout to
39
resend_aioreq(s, aio_req);
58
* keep the same ending time.
40
}
59
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
41
}
60
*/
42
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
61
static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
43
*/
62
{
44
s->co_recv = NULL;
63
- /* See qemu_soonest_timeout() uint64_t hack */
45
64
- int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns);
46
+ QLIST_REMOVE(aio_req, aio_siblings);
65
+ int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
47
switch (rsp.result) {
66
48
case SD_RES_SUCCESS:
67
if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
49
break;
68
poll_set_started(ctx, true);
50
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
51
break;
52
}
53
54
- free_aio_req(s, aio_req);
55
- if (!acb->nr_pending) {
56
+ g_free(aio_req);
57
+
58
+ if (!--acb->nr_pending) {
59
/*
60
* We've finished all requests which belong to the AIOCB, so
61
* we can switch back to sd_co_readv/writev now.
62
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
63
uint64_t old_oid = aio_req->base_oid;
64
bool create = aio_req->create;
65
66
+ QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
67
+
68
if (!nr_copies) {
69
error_report("bug");
70
}
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
72
iov.iov_len = sizeof(s->inode);
73
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
74
data_len, offset, 0, false, 0, offset);
75
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
76
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
77
if (--acb->nr_pending) {
78
qemu_coroutine_yield();
79
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
80
old_oid,
81
acb->aiocb_type == AIOCB_DISCARD_OBJ ?
82
0 : done);
83
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
84
-
85
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
86
acb->aiocb_type);
87
done:
88
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
89
acb.nr_pending++;
90
aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
91
0, 0, 0, false, 0, 0);
92
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
93
add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
94
95
if (--acb.nr_pending) {
96
--
69
--
97
2.9.3
70
2.21.0
98
71
99
72
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
At KVM Forum 2018 I gave a presentation on security in QEMU:
2
https://www.youtube.com/watch?v=YAdRf_hwxU8 (video)
3
https://vmsplice.net/~stefan/stefanha-kvm-forum-2018.pdf (slides)
2
4
3
Sheepdog's AIOCB are completely internal entities for a group of
5
This patch adds a guide to secure coding practices. This document
4
requests and do not need dynamic allocation.
6
covers things that developers should know about security in QEMU. It is
7
just a starting point that we can expand on later. I hope it will be
8
useful as a resource for new contributors and will save code reviewers
9
from explaining the same concepts many times.
5
10
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20161129113245.32724-4-pbonzini@redhat.com
12
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
13
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
16
Reviewed-by: Li Qiang <liq3ea@gmail.com>
17
Message-id: 20190509121820.16294-2-stefanha@redhat.com
18
Message-Id: <20190509121820.16294-2-stefanha@redhat.com>
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
20
---
10
block/sheepdog.c | 99 ++++++++++++++++++++++----------------------------------
21
docs/devel/index.rst | 1 +
11
1 file changed, 39 insertions(+), 60 deletions(-)
22
docs/devel/secure-coding-practices.rst | 106 +++++++++++++++++++++++++
23
2 files changed, 107 insertions(+)
24
create mode 100644 docs/devel/secure-coding-practices.rst
12
25
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
26
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
14
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
28
--- a/docs/devel/index.rst
16
+++ b/block/sheepdog.c
29
+++ b/docs/devel/index.rst
17
@@ -XXX,XX +XXX,XX @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
30
@@ -XXX,XX +XXX,XX @@ Contents:
18
} while (0)
31
stable-process
19
32
testing
20
typedef struct SheepdogAIOCB SheepdogAIOCB;
33
decodetree
21
+typedef struct BDRVSheepdogState BDRVSheepdogState;
34
+ secure-coding-practices
22
35
diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst
23
typedef struct AIOReq {
36
new file mode 100644
24
SheepdogAIOCB *aiocb;
37
index XXXXXXX..XXXXXXX
25
@@ -XXX,XX +XXX,XX @@ enum AIOCBState {
38
--- /dev/null
26
|| y->max_affect_data_idx < x->min_affect_data_idx))
39
+++ b/docs/devel/secure-coding-practices.rst
27
40
@@ -XXX,XX +XXX,XX @@
28
struct SheepdogAIOCB {
41
+=======================
29
- BlockAIOCB common;
42
+Secure Coding Practices
30
+ BDRVSheepdogState *s;
43
+=======================
31
44
+This document covers topics that both developers and security researchers must
32
QEMUIOVector *qiov;
45
+be aware of so that they can develop safe code and audit existing code
33
46
+properly.
34
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
47
+
35
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
48
+Reporting Security Bugs
36
};
49
+-----------------------
37
50
+For details on how to report security bugs or ask questions about potential
38
-typedef struct BDRVSheepdogState {
51
+security bugs, see the `Security Process wiki page
39
+struct BDRVSheepdogState {
52
+<https://wiki.qemu.org/SecurityProcess>`_.
40
BlockDriverState *bs;
53
+
41
AioContext *aio_context;
54
+General Secure C Coding Practices
42
55
+---------------------------------
43
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSheepdogState {
56
+Most CVEs (security bugs) reported against QEMU are not specific to
44
57
+virtualization or emulation. They are simply C programming bugs. Therefore
45
CoQueue overlapping_queue;
58
+it's critical to be aware of common classes of security bugs.
46
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
59
+
47
-} BDRVSheepdogState;
60
+There is a wide selection of resources available covering secure C coding. For
48
+};
61
+example, the `CERT C Coding Standard
49
62
+<https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard>`_
50
typedef struct BDRVSheepdogReopenState {
63
+covers the most important classes of security bugs.
51
int fd;
64
+
52
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
65
+Instead of describing them in detail here, only the names of the most important
53
acb->nr_pending--;
66
+classes of security bugs are mentioned:
54
}
67
+
55
68
+* Buffer overflows
56
-static const AIOCBInfo sd_aiocb_info = {
69
+* Use-after-free and double-free
57
- .aiocb_size = sizeof(SheepdogAIOCB),
70
+* Integer overflows
58
-};
71
+* Format string vulnerabilities
59
-
72
+
60
-static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
73
+Some of these classes of bugs can be detected by analyzers. Static analysis is
61
- int64_t sector_num, int nb_sectors)
74
+performed regularly by Coverity and the most obvious of these bugs are even
62
+static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
75
+reported by compilers. Dynamic analysis is possible with valgrind, tsan, and
63
+ QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
76
+asan.
64
+ int type)
77
+
65
{
78
+Input Validation
66
- SheepdogAIOCB *acb;
79
+----------------
67
uint32_t object_size;
80
+Inputs from the guest or external sources (e.g. network, files) cannot be
68
- BDRVSheepdogState *s = bs->opaque;
81
+trusted and may be invalid. Inputs must be checked before using them in a way
69
82
+that could crash the program, expose host memory to the guest, or otherwise be
70
object_size = (UINT32_C(1) << s->inode.block_size_shift);
83
+exploitable by an attacker.
71
84
+
72
- acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
85
+The most sensitive attack surface is device emulation. All hardware register
73
+ acb->s = s;
86
+accesses and data read from guest memory must be validated. A typical example
74
87
+is a device that contains multiple units that are selectable by the guest via
75
acb->qiov = qiov;
88
+an index register::
76
89
+
77
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
90
+ typedef struct {
78
91
+ ProcessingUnit unit[2];
79
acb->min_dirty_data_idx = UINT32_MAX;
92
+ ...
80
acb->max_dirty_data_idx = 0;
93
+ } MyDeviceState;
81
-
94
+
82
- return acb;
95
+ static void mydev_writel(void *opaque, uint32_t addr, uint32_t val)
83
+ acb->aiocb_type = type;
96
+ {
84
}
97
+ MyDeviceState *mydev = opaque;
85
98
+ ProcessingUnit *unit;
86
/* Return -EIO in case of error, file descriptor on success */
99
+
87
@@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
100
+ switch (addr) {
88
*/
101
+ case MYDEV_SELECT_UNIT:
89
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
102
+ unit = &mydev->unit[val]; <-- this input wasn't validated!
90
{
103
+ ...
91
- BDRVSheepdogState *s = acb->common.bs->opaque;
104
+ }
92
+ BDRVSheepdogState *s = acb->s;
105
+ }
93
struct iovec iov;
106
+
94
AIOReq *aio_req;
107
+If ``val`` is not in range [0, 1] then an out-of-bounds memory access will take
95
uint32_t offset, data_len, mn, mx;
108
+place when ``unit`` is dereferenced. The code must check that ``val`` is 0 or
96
@@ -XXX,XX +XXX,XX @@ out:
109
+1 and handle the case where it is invalid.
97
* Returns 1 when we need to wait a response, 0 when there is no sent
110
+
98
* request and -errno in error cases.
111
+Unexpected Device Accesses
99
*/
112
+--------------------------
100
-static void coroutine_fn sd_co_rw_vector(void *p)
113
+The guest may access device registers in unusual orders or at unexpected
101
+static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
114
+moments. Device emulation code must not assume that the guest follows the
102
{
115
+typical "theory of operation" presented in driver writer manuals. The guest
103
- SheepdogAIOCB *acb = p;
116
+may make nonsense accesses to device registers such as starting operations
104
int ret = 0;
117
+before the device has been fully initialized.
105
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
118
+
106
unsigned long idx;
119
+A related issue is that device emulation code must be prepared for unexpected
107
uint32_t object_size;
120
+device register accesses while asynchronous operations are in progress. A
108
uint64_t oid;
121
+well-behaved guest might wait for a completion interrupt before accessing
109
uint64_t offset;
122
+certain device registers. Device emulation code must handle the case where the
110
- BDRVSheepdogState *s = acb->common.bs->opaque;
123
+guest overwrites registers or submits further requests before an ongoing
111
+ BDRVSheepdogState *s = acb->s;
124
+request completes. Unexpected accesses must not cause memory corruption or
112
SheepdogInode *inode = &s->inode;
125
+leaks in QEMU.
113
AIOReq *aio_req;
126
+
114
127
+Invalid device register accesses can be reported with
115
@@ -XXX,XX +XXX,XX @@ static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
128
+``qemu_log_mask(LOG_GUEST_ERROR, ...)``. The ``-d guest_errors`` command-line
116
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
129
+option enables these log messages.
117
int nb_sectors, QEMUIOVector *qiov)
130
+
118
{
131
+Live Migration
119
- SheepdogAIOCB *acb;
132
+--------------
120
+ SheepdogAIOCB acb;
133
+Device state can be saved to disk image files and shared with other users.
121
int ret;
134
+Live migration code must validate inputs when loading device state so an
122
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
135
+attacker cannot gain control by crafting invalid device states. Device state
123
BDRVSheepdogState *s = bs->opaque;
136
+is therefore considered untrusted even though it is typically generated by QEMU
124
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
137
+itself.
125
}
138
+
126
}
139
+Guest Memory Access Races
127
140
+-------------------------
128
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
141
+Guests with multiple vCPUs may modify guest RAM while device emulation code is
129
- acb->aiocb_type = AIOCB_WRITE_UDATA;
142
+running. Device emulation code must copy in descriptors and other guest RAM
130
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
143
+structures and only process the local copy. This prevents
131
144
+time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to
132
retry:
145
+crash when a vCPU thread modifies guest RAM while device emulation is
133
- if (check_overlapping_aiocb(s, acb)) {
146
+processing it.
134
+ if (check_overlapping_aiocb(s, &acb)) {
135
qemu_co_queue_wait(&s->overlapping_queue);
136
goto retry;
137
}
138
139
- sd_co_rw_vector(acb);
140
- sd_write_done(acb);
141
+ sd_co_rw_vector(&acb);
142
+ sd_write_done(&acb);
143
144
- QLIST_REMOVE(acb, aiocb_siblings);
145
+ QLIST_REMOVE(&acb, aiocb_siblings);
146
qemu_co_queue_restart_all(&s->overlapping_queue);
147
- ret = acb->ret;
148
- qemu_aio_unref(acb);
149
- return ret;
150
+ return acb.ret;
151
}
152
153
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
154
int nb_sectors, QEMUIOVector *qiov)
155
{
156
- SheepdogAIOCB *acb;
157
- int ret;
158
+ SheepdogAIOCB acb;
159
BDRVSheepdogState *s = bs->opaque;
160
161
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
162
- acb->aiocb_type = AIOCB_READ_UDATA;
163
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
164
165
retry:
166
- if (check_overlapping_aiocb(s, acb)) {
167
+ if (check_overlapping_aiocb(s, &acb)) {
168
qemu_co_queue_wait(&s->overlapping_queue);
169
goto retry;
170
}
171
172
- sd_co_rw_vector(acb);
173
+ sd_co_rw_vector(&acb);
174
175
- QLIST_REMOVE(acb, aiocb_siblings);
176
+ QLIST_REMOVE(&acb, aiocb_siblings);
177
qemu_co_queue_restart_all(&s->overlapping_queue);
178
- ret = acb->ret;
179
- qemu_aio_unref(acb);
180
- return ret;
181
+ return acb.ret;
182
}
183
184
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
185
{
186
BDRVSheepdogState *s = bs->opaque;
187
- SheepdogAIOCB *acb;
188
- int ret;
189
+ SheepdogAIOCB acb;
190
AIOReq *aio_req;
191
192
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
193
return 0;
194
}
195
196
- acb = sd_aio_setup(bs, NULL, 0, 0);
197
- acb->aiocb_type = AIOCB_FLUSH_CACHE;
198
+ sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
199
200
- acb->nr_pending++;
201
- aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
202
+ acb.nr_pending++;
203
+ aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
204
0, 0, 0, false, 0, 0);
205
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
206
- add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
207
+ add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
208
209
- if (--acb->nr_pending) {
210
+ if (--acb.nr_pending) {
211
qemu_coroutine_yield();
212
}
213
- ret = acb->ret;
214
- qemu_aio_unref(acb);
215
- return ret;
216
+ return acb.ret;
217
}
218
219
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
220
@@ -XXX,XX +XXX,XX @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
221
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
222
int count)
223
{
224
- SheepdogAIOCB *acb;
225
+ SheepdogAIOCB acb;
226
BDRVSheepdogState *s = bs->opaque;
227
- int ret;
228
QEMUIOVector discard_iov;
229
struct iovec iov;
230
uint32_t zero = 0;
231
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
232
if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
233
return -ENOTSUP;
234
}
235
- acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
236
- count >> BDRV_SECTOR_BITS);
237
- acb->aiocb_type = AIOCB_DISCARD_OBJ;
238
+ sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
239
+ count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
240
241
retry:
242
- if (check_overlapping_aiocb(s, acb)) {
243
+ if (check_overlapping_aiocb(s, &acb)) {
244
qemu_co_queue_wait(&s->overlapping_queue);
245
goto retry;
246
}
247
248
- sd_co_rw_vector(acb);
249
+ sd_co_rw_vector(&acb);
250
251
- QLIST_REMOVE(acb, aiocb_siblings);
252
+ QLIST_REMOVE(&acb, aiocb_siblings);
253
qemu_co_queue_restart_all(&s->overlapping_queue);
254
- ret = acb->ret;
255
- qemu_aio_unref(acb);
256
- return ret;
257
+ return acb.ret;
258
}
259
260
static coroutine_fn int64_t
261
--
147
--
262
2.9.3
148
2.21.0
263
149
264
150
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
This new chapter in the QEMU documentation covers the security
2
requirements that QEMU is designed to meet and principles for securely
3
deploying QEMU.
2
4
3
SheepdogAIOCB is internal to sheepdog.c, hence it is never canceled.
5
It is just a starting point that can be extended in the future with more
6
information.
4
7
5
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20161129113245.32724-2-pbonzini@redhat.com
9
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
7
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
13
Reviewed-by: Li Qiang <liq3ea@gmail.com>
14
Message-id: 20190509121820.16294-3-stefanha@redhat.com
15
Message-Id: <20190509121820.16294-3-stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
17
---
9
block/sheepdog.c | 52 ----------------------------------------------------
18
Makefile | 2 +-
10
1 file changed, 52 deletions(-)
19
docs/security.texi | 131 +++++++++++++++++++++++++++++++++++++++++++++
20
qemu-doc.texi | 3 ++
21
3 files changed, 135 insertions(+), 1 deletion(-)
22
create mode 100644 docs/security.texi
11
23
12
diff --git a/block/sheepdog.c b/block/sheepdog.c
24
diff --git a/Makefile b/Makefile
13
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
14
--- a/block/sheepdog.c
26
--- a/Makefile
15
+++ b/block/sheepdog.c
27
+++ b/Makefile
16
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
28
@@ -XXX,XX +XXX,XX @@ qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
17
Coroutine *coroutine;
29
    qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
18
void (*aio_done_func)(SheepdogAIOCB *);
30
    qemu-deprecated.texi qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
19
31
    qemu-monitor-info.texi docs/qemu-block-drivers.texi \
20
- bool cancelable;
32
-    docs/qemu-cpu-models.texi
21
int nr_pending;
33
+    docs/qemu-cpu-models.texi docs/security.texi
22
34
23
uint32_t min_affect_data_idx;
35
docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
24
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
36
docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
25
{
37
diff --git a/docs/security.texi b/docs/security.texi
26
SheepdogAIOCB *acb = aio_req->aiocb;
38
new file mode 100644
27
39
index XXXXXXX..XXXXXXX
28
- acb->cancelable = false;
40
--- /dev/null
29
QLIST_REMOVE(aio_req, aio_siblings);
41
+++ b/docs/security.texi
30
g_free(aio_req);
42
@@ -XXX,XX +XXX,XX @@
31
43
+@node Security
32
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
44
+@chapter Security
33
qemu_aio_unref(acb);
45
+
34
}
46
+@section Overview
35
47
+
36
-/*
48
+This chapter explains the security requirements that QEMU is designed to meet
37
- * Check whether the specified acb can be canceled
49
+and principles for securely deploying QEMU.
38
- *
50
+
39
- * We can cancel aio when any request belonging to the acb is:
51
+@section Security Requirements
40
- * - Not processed by the sheepdog server.
52
+
41
- * - Not linked to the inflight queue.
53
+QEMU supports many different use cases, some of which have stricter security
42
- */
54
+requirements than others. The community has agreed on the overall security
43
-static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
55
+requirements that users may depend on. These requirements define what is
44
-{
56
+considered supported from a security perspective.
45
- BDRVSheepdogState *s = acb->common.bs->opaque;
57
+
46
- AIOReq *aioreq;
58
+@subsection Virtualization Use Case
47
-
59
+
48
- if (!acb->cancelable) {
60
+The virtualization use case covers cloud and virtual private server (VPS)
49
- return false;
61
+hosting, as well as traditional data center and desktop virtualization. These
50
- }
62
+use cases rely on hardware virtualization extensions to execute guest code
51
-
63
+safely on the physical CPU at close-to-native speed.
52
- QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
64
+
53
- if (aioreq->aiocb == acb) {
65
+The following entities are untrusted, meaning that they may be buggy or
54
- return false;
66
+malicious:
55
- }
67
+
56
- }
68
+@itemize
57
-
69
+@item Guest
58
- return true;
70
+@item User-facing interfaces (e.g. VNC, SPICE, WebSocket)
59
-}
71
+@item Network protocols (e.g. NBD, live migration)
60
-
72
+@item User-supplied files (e.g. disk images, kernels, device trees)
61
-static void sd_aio_cancel(BlockAIOCB *blockacb)
73
+@item Passthrough devices (e.g. PCI, USB)
62
-{
74
+@end itemize
63
- SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
75
+
64
- BDRVSheepdogState *s = acb->common.bs->opaque;
76
+Bugs affecting these entities are evaluated on whether they can cause damage in
65
- AIOReq *aioreq, *next;
77
+real-world use cases and treated as security bugs if this is the case.
66
-
78
+
67
- if (sd_acb_cancelable(acb)) {
79
+@subsection Non-virtualization Use Case
68
- /* Remove outstanding requests from failed queue. */
80
+
69
- QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
81
+The non-virtualization use case covers emulation using the Tiny Code Generator
70
- next) {
82
+(TCG). In principle the TCG and device emulation code used in conjunction with
71
- if (aioreq->aiocb == acb) {
83
+the non-virtualization use case should meet the same security requirements as
72
- free_aio_req(s, aioreq);
84
+the virtualization use case. However, for historical reasons much of the
73
- }
85
+non-virtualization use case code was not written with these security
74
- }
86
+requirements in mind.
75
-
87
+
76
- assert(acb->nr_pending == 0);
88
+Bugs affecting the non-virtualization use case are not considered security
77
- if (acb->common.cb) {
89
+bugs at this time. Users with non-virtualization use cases must not rely on
78
- acb->common.cb(acb->common.opaque, -ECANCELED);
90
+QEMU to provide guest isolation or any security guarantees.
79
- }
91
+
80
- sd_finish_aiocb(acb);
92
+@section Architecture
81
- }
93
+
82
-}
94
+This section describes the design principles that ensure the security
83
-
95
+requirements are met.
84
static const AIOCBInfo sd_aiocb_info = {
96
+
85
.aiocb_size = sizeof(SheepdogAIOCB),
97
+@subsection Guest Isolation
86
- .cancel_async = sd_aio_cancel,
98
+
87
};
99
+Guest isolation is the confinement of guest code to the virtual machine. When
88
100
+guest code gains control of execution on the host this is called escaping the
89
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
101
+virtual machine. Isolation also includes resource limits such as throttling of
90
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
102
+CPU, memory, disk, or network. Guests must be unable to exceed their resource
91
acb->nb_sectors = nb_sectors;
103
+limits.
92
104
+
93
acb->aio_done_func = NULL;
105
+QEMU presents an attack surface to the guest in the form of emulated devices.
94
- acb->cancelable = true;
106
+The guest must not be able to gain control of QEMU. Bugs in emulated devices
95
acb->coroutine = qemu_coroutine_self();
107
+could allow malicious guests to gain code execution in QEMU. At this point the
96
acb->ret = 0;
108
+guest has escaped the virtual machine and is able to act in the context of the
97
acb->nr_pending = 0;
109
+QEMU process on the host.
110
+
111
+Guests often interact with other guests and share resources with them. A
112
+malicious guest must not gain control of other guests or access their data.
113
+Disk image files and network traffic must be protected from other guests unless
114
+explicitly shared between them by the user.
115
+
116
+@subsection Principle of Least Privilege
117
+
118
+The principle of least privilege states that each component only has access to
119
+the privileges necessary for its function. In the case of QEMU this means that
120
+each process only has access to resources belonging to the guest.
121
+
122
+The QEMU process should not have access to any resources that are inaccessible
123
+to the guest. This way the guest does not gain anything by escaping into the
124
+QEMU process since it already has access to those same resources from within
125
+the guest.
126
+
127
+Following the principle of least privilege immediately fulfills guest isolation
128
+requirements. For example, guest A only has access to its own disk image file
129
+@code{a.img} and not guest B's disk image file @code{b.img}.
130
+
131
+In reality certain resources are inaccessible to the guest but must be
132
+available to QEMU to perform its function. For example, host system calls are
133
+necessary for QEMU but are not exposed to guests. A guest that escapes into
134
+the QEMU process can then begin invoking host system calls.
135
+
136
+New features must be designed to follow the principle of least privilege.
137
+Should this not be possible for technical reasons, the security risk must be
138
+clearly documented so users are aware of the trade-off of enabling the feature.
139
+
140
+@subsection Isolation mechanisms
141
+
142
+Several isolation mechanisms are available to realize this architecture of
143
+guest isolation and the principle of least privilege. With the exception of
144
+Linux seccomp, these mechanisms are all deployed by management tools that
145
+launch QEMU, such as libvirt. They are also platform-specific so they are only
146
+described briefly for Linux here.
147
+
148
+The fundamental isolation mechanism is that QEMU processes must run as
149
+unprivileged users. Sometimes it seems more convenient to launch QEMU as
150
+root to give it access to host devices (e.g. @code{/dev/net/tun}) but this poses a
151
+huge security risk. File descriptor passing can be used to give an otherwise
152
+unprivileged QEMU process access to host devices without running QEMU as root.
153
+It is also possible to launch QEMU as a non-root user and configure UNIX groups
154
+for access to @code{/dev/kvm}, @code{/dev/net/tun}, and other device nodes.
155
+Some Linux distros already ship with UNIX groups for these devices by default.
156
+
157
+@itemize
158
+@item SELinux and AppArmor make it possible to confine processes beyond the
159
+traditional UNIX process and file permissions model. They restrict the QEMU
160
+process from accessing processes and files on the host system that are not
161
+needed by QEMU.
162
+
163
+@item Resource limits and cgroup controllers provide throughput and utilization
164
+limits on key resources such as CPU time, memory, and I/O bandwidth.
165
+
166
+@item Linux namespaces can be used to make process, file system, and other system
167
+resources unavailable to QEMU. A namespaced QEMU process is restricted to only
168
+those resources that were granted to it.
169
+
170
+@item Linux seccomp is available via the QEMU @option{--sandbox} option. It disables
171
+system calls that are not needed by QEMU, thereby reducing the host kernel
172
+attack surface.
173
+@end itemize
174
diff --git a/qemu-doc.texi b/qemu-doc.texi
175
index XXXXXXX..XXXXXXX 100644
176
--- a/qemu-doc.texi
177
+++ b/qemu-doc.texi
178
@@ -XXX,XX +XXX,XX @@
179
* QEMU Guest Agent::
180
* QEMU User space emulator::
181
* System requirements::
182
+* Security::
183
* Implementation notes::
184
* Deprecated features::
185
* Supported build platforms::
186
@@ -XXX,XX +XXX,XX @@ added with Linux 4.5 which is supported by the major distros. And even
187
if RHEL7 has kernel 3.10, KVM there has the required functionality there
188
to make it close to a 4.5 or newer kernel.
189
190
+@include docs/security.texi
191
+
192
@include qemu-tech.texi
193
194
@include qemu-deprecated.texi
98
--
195
--
99
2.9.3
196
2.21.0
100
197
101
198
diff view generated by jsdifflib