[Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block

Deepa Srinivasan posted 1 patch 6 years, 4 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/1511456107-7081-1-git-send-email-deepa.srinivasan@oracle.com
Test checkpatch passed
Test docker passed
Test ppc passed
Test s390x passed
There is a newer version of this series
block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
1 file changed, 25 insertions(+), 26 deletions(-)
[Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Deepa Srinivasan 6 years, 4 months ago
Starting qemu with the following arguments causes qemu to segfault:
... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1

This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
details about the bug follow.

blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().

When blk_aio_ioctl() is executed from within a coroutine context (e.g.
iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
the current coroutine's wakeup queue. blk_aio_ioctl() then returns.

When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
....
    BlkRwCo *rwco = &acb->rwco;

    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
                             rwco->qiov->iov[0].iov_base);  <--- qiov is
                                                                 invalid here
...

In the case when blk_aio_ioctl() is called from a non-coroutine context,
blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
execution is complete, control returns to blk_aio_ioctl_entry() after the call
to blk_co_ioctl(). There is no invalid reference after this point, but the
function is still holding on to invalid pointers.

The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
coroutine function casts it to QEMUIOVector or uses the void pointer directly.

Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
---
 block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index baef8e7..2d0d9b6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
 typedef struct BlkRwCo {
     BlockBackend *blk;
     int64_t offset;
-    QEMUIOVector *qiov;
+    void *iobuf;
     int ret;
     BdrvRequestFlags flags;
 } BlkRwCo;
@@ -1148,17 +1148,19 @@ typedef struct BlkRwCo {
 static void blk_read_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
 
-    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
-                              rwco->qiov, rwco->flags);
+    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+                              qiov, rwco->flags);
 }
 
 static void blk_write_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
 
-    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
-                               rwco->qiov, rwco->flags);
+    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
+                               qiov, rwco->flags);
 }
 
 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
@@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
     rwco = (BlkRwCo) {
         .blk    = blk,
         .offset = offset,
-        .qiov   = &qiov,
+        .iobuf  = &qiov,
         .flags  = flags,
         .ret    = NOT_DONE,
     };
@@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque)
 }
 
 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
-                                QEMUIOVector *qiov, CoroutineEntry co_entry,
+                                void *iobuf, CoroutineEntry co_entry,
                                 BdrvRequestFlags flags,
                                 BlockCompletionFunc *cb, void *opaque)
 {
@@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
     acb->rwco = (BlkRwCo) {
         .blk    = blk,
         .offset = offset,
-        .qiov   = qiov,
+        .iobuf  = iobuf,
         .flags  = flags,
         .ret    = NOT_DONE,
     };
@@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
+    QEMUIOVector *qiov = rwco->iobuf;
 
-    assert(rwco->qiov->size == acb->bytes);
+    assert(qiov->size == acb->bytes);
     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
-                              rwco->qiov, rwco->flags);
+                              qiov, rwco->flags);
     blk_aio_complete(acb);
 }
 
@@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
+    QEMUIOVector *qiov = rwco->iobuf;
 
-    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+    assert(!qiov || qiov->size == acb->bytes);
     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
-                               rwco->qiov, rwco->flags);
+                               qiov, rwco->flags);
     blk_aio_complete(acb);
 }
 
@@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 static void blk_ioctl_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
-                             rwco->qiov->iov[0].iov_base);
+                             qiov->iov[0].iov_base);
 }
 
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
@@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque)
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
-                             rwco->qiov->iov[0].iov_base);
+    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
-    QEMUIOVector qiov;
-    struct iovec iov;
-
-    iov = (struct iovec) {
-        .iov_base = buf,
-        .iov_len = 0,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
+    return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
 }
 
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
@@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
 static void blk_pdiscard_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
-    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
 }
 
 int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-- 
2.7.4


Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Deepa Srinivasan 6 years, 4 months ago
blk_aio_prwv() now takes a void pointer and the coroutine functions have been modified to cast it into QEMUIOVector if needed. It does not use an union in BlkRwCo since this leads to code - blk_aio_prwv() would have to write to the void pointer member, but coroutines would sometimes read the QEMUIOVector member. Paolo also suggested not using a union.

Note that a similar issue exists in blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a QEMUIOVector. This will need to be fixed separately to keep it consistent with the AIO path.

> On Nov 23, 2017, at 8:55 AM, Deepa Srinivasan <deepa.srinivasan@oracle.com> wrote:
> 
> Starting qemu with the following arguments causes qemu to segfault:
> ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
> iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
> 
> This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
> blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
> details about the bug follow.
> 
> blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
> coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
> 
> When blk_aio_ioctl() is executed from within a coroutine context (e.g.
> iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
> the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
> 
> When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
> ....
>    BlkRwCo *rwco = &acb->rwco;
> 
>    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
>                             rwco->qiov->iov[0].iov_base);  <--- qiov is
>                                                                 invalid here
> ...
> 
> In the case when blk_aio_ioctl() is called from a non-coroutine context,
> blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
> qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
> execution is complete, control returns to blk_aio_ioctl_entry() after the call
> to blk_co_ioctl(). There is no invalid reference after this point, but the
> function is still holding on to invalid pointers.
> 
> The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
> rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
> coroutine function casts it to QEMUIOVector or uses the void pointer directly.
> 
> Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
> ---
> block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
> 1 file changed, 25 insertions(+), 26 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index baef8e7..2d0d9b6 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
> typedef struct BlkRwCo {
>     BlockBackend *blk;
>     int64_t offset;
> -    QEMUIOVector *qiov;
> +    void *iobuf;
>     int ret;
>     BdrvRequestFlags flags;
> } BlkRwCo;
> @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo {
> static void blk_read_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
> -                              rwco->qiov, rwco->flags);
> +    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
> +                              qiov, rwco->flags);
> }
> 
> static void blk_write_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
> -                               rwco->qiov, rwco->flags);
> +    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
> +                               qiov, rwco->flags);
> }
> 
> static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
> @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
>     rwco = (BlkRwCo) {
>         .blk    = blk,
>         .offset = offset,
> -        .qiov   = &qiov,
> +        .iobuf  = &qiov,
>         .flags  = flags,
>         .ret    = NOT_DONE,
>     };
> @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque)
> }
> 
> static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
> -                                QEMUIOVector *qiov, CoroutineEntry co_entry,
> +                                void *iobuf, CoroutineEntry co_entry,
>                                 BdrvRequestFlags flags,
>                                 BlockCompletionFunc *cb, void *opaque)
> {
> @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
>     acb->rwco = (BlkRwCo) {
>         .blk    = blk,
>         .offset = offset,
> -        .qiov   = qiov,
> +        .iobuf  = iobuf,
>         .flags  = flags,
>         .ret    = NOT_DONE,
>     };
> @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque)
> {
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    assert(rwco->qiov->size == acb->bytes);
> +    assert(qiov->size == acb->bytes);
>     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
> -                              rwco->qiov, rwco->flags);
> +                              qiov, rwco->flags);
>     blk_aio_complete(acb);
> }
> 
> @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque)
> {
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
> +    assert(!qiov || qiov->size == acb->bytes);
>     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
> -                               rwco->qiov, rwco->flags);
> +                               qiov, rwco->flags);
>     blk_aio_complete(acb);
> }
> 
> @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
> static void blk_ioctl_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> +
>     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
> -                             rwco->qiov->iov[0].iov_base);
> +                             qiov->iov[0].iov_base);
> }
> 
> int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
> @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque)
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> 
> -    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
> -                             rwco->qiov->iov[0].iov_base);
> +    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
> +
>     blk_aio_complete(acb);
> }
> 
> BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
>                           BlockCompletionFunc *cb, void *opaque)
> {
> -    QEMUIOVector qiov;
> -    struct iovec iov;
> -
> -    iov = (struct iovec) {
> -        .iov_base = buf,
> -        .iov_len = 0,
> -    };
> -    qemu_iovec_init_external(&qiov, &iov, 1);
> -
> -    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
> +    return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
> }
> 
> int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
> @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
> static void blk_pdiscard_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> -    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
> +    QEMUIOVector *qiov = rwco->iobuf;
> +
> +    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
> }
> 
> int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
> -- 
> 2.7.4
> 
> 


Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Paolo Bonzini 6 years, 4 months ago
On 23/11/2017 18:05, Deepa Srinivasan wrote:
> blk_aio_prwv() now takes a void pointer and the coroutine functions
> have been modified to cast it into QEMUIOVector if needed. It does
> not use an union in BlkRwCo since this leads to code - blk_aio_prwv()
> would have to write to the void pointer member, but coroutines would
> sometimes read the QEMUIOVector member. Paolo also suggested not
> using a union.
> 
> Note that a similar issue exists in
> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always
> creates the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does
> not need a QEMUIOVector. This will need to be fixed separately to
> keep it consistent with the AIO path.

For that it's probably simplest to inline blk_prw into blk_ioctl and
remove all the cruft:

diff --git a/block/block-backend.c b/block/block-backend.c
index 45d9101be3..ceab3166bc 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1404,12 +1404,28 @@ static void blk_ioctl_entry(void *opaque)
 {
     BlkRwCo *rwco = opaque;
     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
-                             rwco->qiov->iov[0].iov_base);
+                             rwco->iobuf);
 }

 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
-    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+    BlkRwCo rwco = (BlkRwCo) {
+        .blk    = blk,
+        .iobuf  = buf,
+        .offset = req,
+        .ret    = NOT_DONE,
+    };
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        blk_ioctl_entry(&rwco);
+    } else {
+        Coroutine *co = qemu_coroutine_create(blk_ioctl_entry, &rwco);
+        bdrv_coroutine_enter(blk_bs(blk), co);
+        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    }
+
+    return rwco.ret;
 }

 static void blk_aio_ioctl_entry(void *opaque)

Thanks,

Paolo

Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Kevin Wolf 6 years, 4 months ago
Am 23.11.2017 um 18:05 hat Deepa Srinivasan geschrieben:
> blk_aio_prwv() now takes a void pointer and the coroutine functions
> have been modified to cast it into QEMUIOVector if needed. It does not
> use an union in BlkRwCo since this leads to code - blk_aio_prwv()
> would have to write to the void pointer member, but coroutines would
> sometimes read the QEMUIOVector member. Paolo also suggested not using
> a union.

I don't particularly like void pointers, but I guess it's fair enough.

> Note that a similar issue exists in
> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates
> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a
> QEMUIOVector. This will need to be fixed separately to keep it
> consistent with the AIO path.

I don't think there is an actual problem in the blk_ioctl() path because
the iov on the stack stays valid as long as the coroutine runs. AIO is
different because it returns before the coroutine has terminated.

Kevin

Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Paolo Bonzini 6 years, 4 months ago
On 23/11/2017 18:29, Kevin Wolf wrote:
>> Note that a similar issue exists in
>> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates
>> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a
>> QEMUIOVector. This will need to be fixed separately to keep it
>> consistent with the AIO path.
> 
> I don't think there is an actual problem in the blk_ioctl() path because
> the iov on the stack stays valid as long as the coroutine runs. AIO is
> different because it returns before the coroutine has terminated.

I agree, it's just code that is slightly ugly.

Paolo

Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Kevin Wolf 6 years, 4 months ago
Am 23.11.2017 um 18:31 hat Paolo Bonzini geschrieben:
> On 23/11/2017 18:29, Kevin Wolf wrote:
> >> Note that a similar issue exists in
> >> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates
> >> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a
> >> QEMUIOVector. This will need to be fixed separately to keep it
> >> consistent with the AIO path.
> > 
> > I don't think there is an actual problem in the blk_ioctl() path because
> > the iov on the stack stays valid as long as the coroutine runs. AIO is
> > different because it returns before the coroutine has terminated.
> 
> I agree, it's just code that is slightly ugly.

Slightly. Neither void pointers nor code duplication make it less ugly,
though. So in this case, I'd say: If it ain't broke, don't fix it.

Kevin

Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Deepa Srinivasan 6 years, 4 months ago
> On Nov 23, 2017, at 9:29 AM, Kevin Wolf <kwolf@redhat.com> wrote:
> 
> Am 23.11.2017 um 18:05 hat Deepa Srinivasan geschrieben:
>> blk_aio_prwv() now takes a void pointer and the coroutine functions
>> have been modified to cast it into QEMUIOVector if needed. It does not
>> use an union in BlkRwCo since this leads to code - blk_aio_prwv()
>> would have to write to the void pointer member, but coroutines would
>> sometimes read the QEMUIOVector member. Paolo also suggested not using
>> a union.
> 
> I don't particularly like void pointers, but I guess it's fair enough.

Agreed, but if a union were to hold QEMUIOVector* and void* in BlkRwCo, blk_aio_prwv() would always write to void* but some coroutine functions would read from the QEMUIOVector* member. Keeping it as a void pointer is a safer option.

> 
>> Note that a similar issue exists in
>> blk_ioctl()/blk_ioctl_entry()/blk_prw() where blk_prw() always creates
>> the QEMUIOVector even if blk_ioctl()/blk_ioctl_entry() does not need a
>> QEMUIOVector. This will need to be fixed separately to keep it
>> consistent with the AIO path.
> 
> I don't think there is an actual problem in the blk_ioctl() path because
> the iov on the stack stays valid as long as the coroutine runs. AIO is
> different because it returns before the coroutine has terminated.
> 

The problem in blk_ioctl() is not a crash, because blk_prwv() waits for the coroutine completion, as you say.

The issue is that it unnecessarily creates a QEMUIOVector for the ioctl case. I was saying, if this is to be kept consistent with the AIO patch, then it could be done in a separate patch.

> Kevin
> 


Re: [Qemu-devel] [PATCH] block: Fix qemu crash when using scsi-block
Posted by Deepa Srinivasan 6 years, 4 months ago
Kevin, Paolo, Stefan,

Are there any further comments on this patch? Can this patch be committed?

Thanks
Deepa

> On Nov 23, 2017, at 8:55 AM, Deepa Srinivasan <deepa.srinivasan@oracle.com> wrote:
> 
> Starting qemu with the following arguments causes qemu to segfault:
> ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
> iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
> 
> This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
> blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
> details about the bug follow.
> 
> blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
> coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
> 
> When blk_aio_ioctl() is executed from within a coroutine context (e.g.
> iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
> the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
> 
> When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
> ....
>    BlkRwCo *rwco = &acb->rwco;
> 
>    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
>                             rwco->qiov->iov[0].iov_base);  <--- qiov is
>                                                                 invalid here
> ...
> 
> In the case when blk_aio_ioctl() is called from a non-coroutine context,
> blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
> qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
> execution is complete, control returns to blk_aio_ioctl_entry() after the call
> to blk_co_ioctl(). There is no invalid reference after this point, but the
> function is still holding on to invalid pointers.
> 
> The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
> rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
> coroutine function casts it to QEMUIOVector or uses the void pointer directly.
> 
> Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
> ---
> block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
> 1 file changed, 25 insertions(+), 26 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index baef8e7..2d0d9b6 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
> typedef struct BlkRwCo {
>     BlockBackend *blk;
>     int64_t offset;
> -    QEMUIOVector *qiov;
> +    void *iobuf;
>     int ret;
>     BdrvRequestFlags flags;
> } BlkRwCo;
> @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo {
> static void blk_read_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
> -                              rwco->qiov, rwco->flags);
> +    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
> +                              qiov, rwco->flags);
> }
> 
> static void blk_write_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
> -                               rwco->qiov, rwco->flags);
> +    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
> +                               qiov, rwco->flags);
> }
> 
> static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
> @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
>     rwco = (BlkRwCo) {
>         .blk    = blk,
>         .offset = offset,
> -        .qiov   = &qiov,
> +        .iobuf  = &qiov,
>         .flags  = flags,
>         .ret    = NOT_DONE,
>     };
> @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque)
> }
> 
> static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
> -                                QEMUIOVector *qiov, CoroutineEntry co_entry,
> +                                void *iobuf, CoroutineEntry co_entry,
>                                 BdrvRequestFlags flags,
>                                 BlockCompletionFunc *cb, void *opaque)
> {
> @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
>     acb->rwco = (BlkRwCo) {
>         .blk    = blk,
>         .offset = offset,
> -        .qiov   = qiov,
> +        .iobuf  = iobuf,
>         .flags  = flags,
>         .ret    = NOT_DONE,
>     };
> @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque)
> {
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    assert(rwco->qiov->size == acb->bytes);
> +    assert(qiov->size == acb->bytes);
>     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
> -                              rwco->qiov, rwco->flags);
> +                              qiov, rwco->flags);
>     blk_aio_complete(acb);
> }
> 
> @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque)
> {
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> +    QEMUIOVector *qiov = rwco->iobuf;
> 
> -    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
> +    assert(!qiov || qiov->size == acb->bytes);
>     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
> -                               rwco->qiov, rwco->flags);
> +                               qiov, rwco->flags);
>     blk_aio_complete(acb);
> }
> 
> @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
> static void blk_ioctl_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> +    QEMUIOVector *qiov = rwco->iobuf;
> +
>     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
> -                             rwco->qiov->iov[0].iov_base);
> +                             qiov->iov[0].iov_base);
> }
> 
> int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
> @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque)
>     BlkAioEmAIOCB *acb = opaque;
>     BlkRwCo *rwco = &acb->rwco;
> 
> -    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
> -                             rwco->qiov->iov[0].iov_base);
> +    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
> +
>     blk_aio_complete(acb);
> }
> 
> BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
>                           BlockCompletionFunc *cb, void *opaque)
> {
> -    QEMUIOVector qiov;
> -    struct iovec iov;
> -
> -    iov = (struct iovec) {
> -        .iov_base = buf,
> -        .iov_len = 0,
> -    };
> -    qemu_iovec_init_external(&qiov, &iov, 1);
> -
> -    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
> +    return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
> }
> 
> int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
> @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
> static void blk_pdiscard_entry(void *opaque)
> {
>     BlkRwCo *rwco = opaque;
> -    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
> +    QEMUIOVector *qiov = rwco->iobuf;
> +
> +    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
> }
> 
> int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
> -- 
> 2.7.4
> 
>