block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-)
Starting qemu with the following arguments causes qemu to segfault:
... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
details about the bug follow.
blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
When blk_aio_ioctl() is executed from within a coroutine context (e.g.
iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
....
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
rwco->qiov->iov[0].iov_base); <--- qiov is
invalid here
...
In the case when blk_aio_ioctl() is called from a non-coroutine context,
blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
execution is complete, control returns to blk_aio_ioctl_entry() after the call
to blk_co_ioctl(). There is no invalid reference after this point, but the
function is still holding on to invalid pointers.
The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
coroutine function casts it to QEMUIOVector or uses the void pointer directly.
Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
---
block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
1 file changed, 25 insertions(+), 26 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index baef8e7..2d0d9b6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
typedef struct BlkRwCo {
BlockBackend *blk;
int64_t offset;
- QEMUIOVector *qiov;
+ void *iobuf;
int ret;
BdrvRequestFlags flags;
} BlkRwCo;
@@ -1148,17 +1148,19 @@ typedef struct BlkRwCo {
static void blk_read_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
- rwco->qiov, rwco->flags);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+ qiov, rwco->flags);
}
static void blk_write_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
- rwco->qiov, rwco->flags);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
+ qiov, rwco->flags);
}
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
@@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
- .qiov = &qiov,
+ .iobuf = &qiov,
.flags = flags,
.ret = NOT_DONE,
};
@@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque)
}
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
- QEMUIOVector *qiov, CoroutineEntry co_entry,
+ void *iobuf, CoroutineEntry co_entry,
BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
- .qiov = qiov,
+ .iobuf = iobuf,
.flags = flags,
.ret = NOT_DONE,
};
@@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
+ QEMUIOVector *qiov = rwco->iobuf;
- assert(rwco->qiov->size == acb->bytes);
+ assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
- rwco->qiov, rwco->flags);
+ qiov, rwco->flags);
blk_aio_complete(acb);
}
@@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
+ QEMUIOVector *qiov = rwco->iobuf;
- assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- rwco->qiov, rwco->flags);
+ qiov, rwco->flags);
blk_aio_complete(acb);
}
@@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
static void blk_ioctl_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
+
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- rwco->qiov->iov[0].iov_base);
+ qiov->iov[0].iov_base);
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
@@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- rwco->qiov->iov[0].iov_base);
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- QEMUIOVector qiov;
- struct iovec iov;
-
- iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = 0,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
@@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
static void blk_pdiscard_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+ QEMUIOVector *qiov = rwco->iobuf;
+
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
--
2.7.4
blk_aio_prwv() now takes a void pointer and the coroutine functions have been modified to cast it into QEMUIOVector if needed. It does not use an union in BlkRwCo since this leads to code - blk_aio_prwv() would have to write to the void pointer member, but coroutines would sometimes read the QEMUIOVector member. Paolo also suggested not using a union. Note that a similar issue exists in blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a QEMUIOVector. This will need to be fixed separately to keep it consistent with the AIO path. > On Nov 23, 2017, at 8:55 AM, Deepa Srinivasan <deepa.srinivasan@oracle.com> wrote: > > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > 1 file changed, 25 insertions(+), 26 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index baef8e7..2d0d9b6 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, > typedef struct BlkRwCo { > BlockBackend *blk; > int64_t offset; > - QEMUIOVector *qiov; > + void *iobuf; > int ret; > BdrvRequestFlags flags; > } BlkRwCo; > @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { > static void blk_read_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static void blk_write_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = &qiov, > + .iobuf = &qiov, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) > } > > static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > - QEMUIOVector *qiov, CoroutineEntry co_entry, > + void *iobuf, CoroutineEntry co_entry, > BdrvRequestFlags flags, > BlockCompletionFunc *cb, void *opaque) > { > @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > acb->rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = qiov, > + .iobuf = iobuf, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(rwco->qiov->size == acb->bytes); > + assert(qiov->size == acb->bytes); > rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); > + assert(!qiov || qiov->size == acb->bytes); > rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > static void blk_ioctl_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > + > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + qiov->iov[0].iov_base); > } > > int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > > - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); > + > blk_aio_complete(acb); > } > > BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, > BlockCompletionFunc *cb, void *opaque) > { > - QEMUIOVector qiov; > - struct iovec iov; > - > - iov = (struct iovec) { > - .iov_base = buf, > - .iov_len = 0, > - }; > - qemu_iovec_init_external(&qiov, &iov, 1); > - > - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); > + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); > } > > int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, > static void blk_pdiscard_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); > + QEMUIOVector *qiov = rwco->iobuf; > + > + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); > } > > int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > -- > 2.7.4 > >
On 23/11/2017 18:05, Deepa Srinivasan wrote: > blk_aio_prwv() now takes a void pointer and the coroutine functions > have been modified to cast it into QEMUIOVector if needed. It does > not use an union in BlkRwCo since this leads to code - blk_aio_prwv() > would have to write to the void pointer member, but coroutines would > sometimes read the QEMUIOVector member. Paolo also suggested not > using a union. > > Note that a similar issue exists in > blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always > creates the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does > not need a QEMUIOVector. This will need to be fixed separately to > keep it consistent with the AIO path. For that it's probably simplest to inline blk_prw into blk_ioctl and remove all the cruft: diff --git a/block/block-backend.c b/block/block-backend.c index 45d9101be3..ceab3166bc 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1404,12 +1404,28 @@ static void blk_ioctl_entry(void *opaque) { BlkRwCo *rwco = opaque; rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - rwco->qiov->iov[0].iov_base); + rwco->iobuf); } int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { - return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0); + BlkRwCo rwco = (BlkRwCo) { + .blk = blk, + .iobuf = buf, + .offset = req, + .ret = NOT_DONE, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + blk_ioctl_entry(&rwco); + } else { + Coroutine *co = qemu_coroutine_create(blk_ioctl_entry, &rwco); + bdrv_coroutine_enter(blk_bs(blk), co); + BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); + } + + return rwco.ret; } static void blk_aio_ioctl_entry(void *opaque) Thanks, Paolo
Am 23.11.2017 um 18:05 hat Deepa Srinivasan geschrieben: > blk_aio_prwv() now takes a void pointer and the coroutine functions > have been modified to cast it into QEMUIOVector if needed. It does not > use an union in BlkRwCo since this leads to code - blk_aio_prwv() > would have to write to the void pointer member, but coroutines would > sometimes read the QEMUIOVector member. Paolo also suggested not using > a union. I don't particularly like void pointers, but I guess it's fair enough. > Note that a similar issue exists in > blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates > the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a > QEMUIOVector. This will need to be fixed separately to keep it > consistent with the AIO path. I don't think there is an actual problem in the blk_ioctl() path because the iov on the stack stays valid as long as the coroutine runs. AIO is different because it returns before the coroutine has terminated. Kevin
On 23/11/2017 18:29, Kevin Wolf wrote: >> Note that a similar issue exists in >> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates >> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a >> QEMUIOVector. This will need to be fixed separately to keep it >> consistent with the AIO path. > > I don't think there is an actual problem in the blk_ioctl() path because > the iov on the stack stays valid as long as the coroutine runs. AIO is > different because it returns before the coroutine has terminated. I agree, it's just code that is slightly ugly. Paolo
Am 23.11.2017 um 18:31 hat Paolo Bonzini geschrieben: > On 23/11/2017 18:29, Kevin Wolf wrote: > >> Note that a similar issue exists in > >> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates > >> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a > >> QEMUIOVector. This will need to be fixed separately to keep it > >> consistent with the AIO path. > > > > I don't think there is an actual problem in the blk_ioctl() path because > > the iov on the stack stays valid as long as the coroutine runs. AIO is > > different because it returns before the coroutine has terminated. > > I agree, it's just code that is slightly ugly. Slightly. Neither void pointers nor code duplication make it less ugly, though. So in this case, I'd say: If it ain't broke, don't fix it. Kevin
> On Nov 23, 2017, at 9:29 AM, Kevin Wolf <kwolf@redhat.com> wrote: > > Am 23.11.2017 um 18:05 hat Deepa Srinivasan geschrieben: >> blk_aio_prwv() now takes a void pointer and the coroutine functions >> have been modified to cast it into QEMUIOVector if needed. It does not >> use an union in BlkRwCo since this leads to code - blk_aio_prwv() >> would have to write to the void pointer member, but coroutines would >> sometimes read the QEMUIOVector member. Paolo also suggested not using >> a union. > > I don't particularly like void pointers, but I guess it's fair enough. Agreed, but if a union were to hold QEMUIOVector* and void* in BlkRwCo, blk_aio_prwv() would always write to void* but some coroutine functions would read from the QEMUIOVector* member. Keeping it as a void pointer is a safer option. > >> Note that a similar issue exists in >> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates >> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a >> QEMUIOVector. This will need to be fixed separately to keep it >> consistent with the AIO path. > > I don't think there is an actual problem in the blk_ioctl() path because > the iov on the stack stays valid as long as the coroutine runs. AIO is > different because it returns before the coroutine has terminated. > The problem in blk_ioctl() is not a crash, because blk_prwv() waits for the coroutine completion, as you say. The issue is that it unnecessarily creates a QEMUIOVector for the ioctl case. I was saying, if this is to be kept consistent with the AIO patch, then it could be done in a separate patch. > Kevin >
Kevin, Paolo, Stefan, Are there any further comments on this patch? Can this patch be committed? Thanks Deepa > On Nov 23, 2017, at 8:55 AM, Deepa Srinivasan <deepa.srinivasan@oracle.com> wrote: > > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > 1 file changed, 25 insertions(+), 26 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index baef8e7..2d0d9b6 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, > typedef struct BlkRwCo { > BlockBackend *blk; > int64_t offset; > - QEMUIOVector *qiov; > + void *iobuf; > int ret; > BdrvRequestFlags flags; > } BlkRwCo; > @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { > static void blk_read_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static void blk_write_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = &qiov, > + .iobuf = &qiov, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) > } > > static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > - QEMUIOVector *qiov, CoroutineEntry co_entry, > + void *iobuf, CoroutineEntry co_entry, > BdrvRequestFlags flags, > BlockCompletionFunc *cb, void *opaque) > { > @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > acb->rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = qiov, > + .iobuf = iobuf, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(rwco->qiov->size == acb->bytes); > + assert(qiov->size == acb->bytes); > rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); > + assert(!qiov || qiov->size == acb->bytes); > rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > static void blk_ioctl_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > + > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + qiov->iov[0].iov_base); > } > > int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > > - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); > + > blk_aio_complete(acb); > } > > BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, > BlockCompletionFunc *cb, void *opaque) > { > - QEMUIOVector qiov; > - struct iovec iov; > - > - iov = (struct iovec) { > - .iov_base = buf, > - .iov_len = 0, > - }; > - qemu_iovec_init_external(&qiov, &iov, 1); > - > - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); > + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); > } > > int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, > static void blk_pdiscard_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); > + QEMUIOVector *qiov = rwco->iobuf; > + > + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); > } > > int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > -- > 2.7.4 > >
© 2016 - 2024 Red Hat, Inc.