Signed-off-by: Lin Ma <lma@suse.com>
---
hw/scsi/scsi-disk.c | 90 ++++++++++++++++++++++++++++++++++++++
include/block/accounting.h | 1 +
include/scsi/constants.h | 1 +
3 files changed, 92 insertions(+)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 387503e11b..9e3002ddaf 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1866,6 +1866,89 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
}
}
+typedef struct GetLbaStatusCBData {
+ uint32_t num_blocks;
+ uint32_t is_deallocated;
+ SCSIDiskReq *r;
+} GetLbaStatusCBData;
+
+static void scsi_get_lba_status_complete(void *opaque, int ret);
+
+static void scsi_get_lba_status_complete_noio(GetLbaStatusCBData *data, int ret)
+{
+ SCSIDiskReq *r = data->r;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+ assert(r->req.aiocb == NULL);
+
+ block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
+ s->qdev.blocksize, BLOCK_ACCT_GET_LBA_STATUS);
+
+ r->req.aiocb = blk_aio_get_lba_status(s->qdev.conf.blk,
+ r->req.cmd.lba * s->qdev.blocksize,
+ s->qdev.blocksize,
+ scsi_get_lba_status_complete, data);
+}
+
+static void scsi_get_lba_status_complete(void *opaque, int ret)
+{
+ GetLbaStatusCBData *data = opaque;
+ SCSIDiskReq *r = data->r;
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+ assert(r->req.aiocb != NULL);
+ r->req.aiocb = NULL;
+
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+ if (scsi_disk_req_check_error(r, ret, true)) {
+ g_free(data);
+ goto done;
+ }
+
+ block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
+ scsi_req_unref(&r->req);
+ g_free(data);
+
+done:
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
+}
+
+static void scsi_disk_emulate_get_lba_status(SCSIRequest *req, uint8_t *outbuf)
+{
+ SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
+ GetLbaStatusCBData *data;
+ uint32_t *num_blocks;
+ uint32_t *is_deallocated;
+
+ data = g_new0(GetLbaStatusCBData, 1);
+ data->r = r;
+ num_blocks = &(data->num_blocks);
+ is_deallocated = &(data->is_deallocated);
+
+ scsi_req_ref(&r->req);
+ scsi_get_lba_status_complete_noio(data, 0);
+
+ /*
+ * 8 + 16 is the length in bytes of response header and
+ * one LBA status descriptor
+ */
+ memset(outbuf, 0, 8 + 16);
+ outbuf[3] = 20;
+ outbuf[8] = (req->cmd.lba >> 56) & 0xff;
+ outbuf[9] = (req->cmd.lba >> 48) & 0xff;
+ outbuf[10] = (req->cmd.lba >> 40) & 0xff;
+ outbuf[11] = (req->cmd.lba >> 32) & 0xff;
+ outbuf[12] = (req->cmd.lba >> 24) & 0xff;
+ outbuf[13] = (req->cmd.lba >> 16) & 0xff;
+ outbuf[14] = (req->cmd.lba >> 8) & 0xff;
+ outbuf[15] = req->cmd.lba & 0xff;
+ outbuf[16] = (*num_blocks >> 24) & 0xff;
+ outbuf[17] = (*num_blocks >> 16) & 0xff;
+ outbuf[18] = (*num_blocks >> 8) & 0xff;
+ outbuf[19] = *num_blocks & 0xff;
+ outbuf[20] = *is_deallocated ? 1 : 0;
+}
+
static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
@@ -2076,6 +2159,13 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
/* Protection, exponent and lowest lba field left blank. */
break;
+ } else if ((req->cmd.buf[1] & 31) == SAI_GET_LBA_STATUS) {
+ if (req->cmd.lba > s->qdev.max_lba) {
+ goto illegal_lba;
+ }
+ scsi_disk_emulate_get_lba_status(req, outbuf);
+ r->iov.iov_len = req->cmd.xfer;
+ return r->iov.iov_len;
}
trace_scsi_disk_emulate_command_SAI_unsupported();
goto illegal_request;
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 878b4c3581..645014fb0b 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -38,6 +38,7 @@ enum BlockAcctType {
BLOCK_ACCT_WRITE,
BLOCK_ACCT_FLUSH,
BLOCK_ACCT_UNMAP,
+ BLOCK_ACCT_GET_LBA_STATUS,
BLOCK_MAX_IOTYPE,
};
diff --git a/include/scsi/constants.h b/include/scsi/constants.h
index 874176019e..b18377b214 100644
--- a/include/scsi/constants.h
+++ b/include/scsi/constants.h
@@ -154,6 +154,7 @@
* SERVICE ACTION IN subcodes
*/
#define SAI_READ_CAPACITY_16 0x10
+#define SAI_GET_LBA_STATUS 0x12
/*
* READ POSITION service action codes
--
2.26.0
On Wed, Jun 17, 2020 at 06:30:18PM +0800, Lin Ma wrote:
> Signed-off-by: Lin Ma <lma@suse.com>
> ---
> hw/scsi/scsi-disk.c | 90 ++++++++++++++++++++++++++++++++++++++
> include/block/accounting.h | 1 +
> include/scsi/constants.h | 1 +
> 3 files changed, 92 insertions(+)
>
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 387503e11b..9e3002ddaf 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -1866,6 +1866,89 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
> }
> }
>
> +typedef struct GetLbaStatusCBData {
> + uint32_t num_blocks;
> + uint32_t is_deallocated;
> + SCSIDiskReq *r;
> +} GetLbaStatusCBData;
> +
> +static void scsi_get_lba_status_complete(void *opaque, int ret);
> +
> +static void scsi_get_lba_status_complete_noio(GetLbaStatusCBData *data, int ret)
> +{
> + SCSIDiskReq *r = data->r;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(r->req.aiocb == NULL);
> +
> + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
> + s->qdev.blocksize, BLOCK_ACCT_GET_LBA_STATUS);
> +
> + r->req.aiocb = blk_aio_get_lba_status(s->qdev.conf.blk,
> + r->req.cmd.lba * s->qdev.blocksize,
> + s->qdev.blocksize,
> + scsi_get_lba_status_complete, data);
> +}
> +
> +static void scsi_get_lba_status_complete(void *opaque, int ret)
> +{
> + GetLbaStatusCBData *data = opaque;
> + SCSIDiskReq *r = data->r;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + g_free(data);
> + goto done;
> + }
> +
> + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
> + scsi_req_unref(&r->req);
> + g_free(data);
> +
> +done:
> + aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
> +}
> +
> +static void scsi_disk_emulate_get_lba_status(SCSIRequest *req, uint8_t *outbuf)
> +{
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + GetLbaStatusCBData *data;
> + uint32_t *num_blocks;
> + uint32_t *is_deallocated;
> +
> + data = g_new0(GetLbaStatusCBData, 1);
> + data->r = r;
> + num_blocks = &(data->num_blocks);
> + is_deallocated = &(data->is_deallocated);
> +
> + scsi_req_ref(&r->req);
> + scsi_get_lba_status_complete_noio(data, 0);
scsi_get_lba_status_complete_noio() looks asynchronous. If the
BlockDriver yields in .bdrv_co_block_status() then the operation has not
completed yet when scsi_get_lba_status_complete_noio() returns. It is
not safe to access the GetLbaStatusCBData data until the async operation
is complete.
Also, scsi_get_lba_status_complete() calls g_free(data) so there is a
use-after-free when *num_blocks and *is_deallocated are accessed.
These issues can be solved by making this code asynchronous (similar to
read/write/flush/discard_zeroes/ioctl). outbuf[] will be filled in in
the completion function before g_free(data) is called.
> +
> + /*
> + * 8 + 16 is the length in bytes of response header and
> + * one LBA status descriptor
> + */
> + memset(outbuf, 0, 8 + 16);
> + outbuf[3] = 20;
> + outbuf[8] = (req->cmd.lba >> 56) & 0xff;
> + outbuf[9] = (req->cmd.lba >> 48) & 0xff;
> + outbuf[10] = (req->cmd.lba >> 40) & 0xff;
> + outbuf[11] = (req->cmd.lba >> 32) & 0xff;
> + outbuf[12] = (req->cmd.lba >> 24) & 0xff;
> + outbuf[13] = (req->cmd.lba >> 16) & 0xff;
> + outbuf[14] = (req->cmd.lba >> 8) & 0xff;
> + outbuf[15] = req->cmd.lba & 0xff;
> + outbuf[16] = (*num_blocks >> 24) & 0xff;
> + outbuf[17] = (*num_blocks >> 16) & 0xff;
> + outbuf[18] = (*num_blocks >> 8) & 0xff;
> + outbuf[19] = *num_blocks & 0xff;
> + outbuf[20] = *is_deallocated ? 1 : 0;
SCSI defines 3 values and QEMU can represent all of them:
0 - mapped or unknown
1 - deallocated
2 - anchored
See the BDRV_BLOCK_* constants in include/block/block.h. The
is_deallocated boolean is not enough to represent this state, but the
bdrv_block_status() return value can be used instead.
> +}
> +
> static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
> {
> SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> @@ -2076,6 +2159,13 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
>
> /* Protection, exponent and lowest lba field left blank. */
> break;
> + } else if ((req->cmd.buf[1] & 31) == SAI_GET_LBA_STATUS) {
> + if (req->cmd.lba > s->qdev.max_lba) {
> + goto illegal_lba;
> + }
> + scsi_disk_emulate_get_lba_status(req, outbuf);
> + r->iov.iov_len = req->cmd.xfer;
> + return r->iov.iov_len;
Is there something tricky going on here with iov_len that prevents us
from using break here and sharing the functions normal return code path?
> }
> trace_scsi_disk_emulate_command_SAI_unsupported();
> goto illegal_request;
> diff --git a/include/block/accounting.h b/include/block/accounting.h
> index 878b4c3581..645014fb0b 100644
> --- a/include/block/accounting.h
> +++ b/include/block/accounting.h
> @@ -38,6 +38,7 @@ enum BlockAcctType {
> BLOCK_ACCT_WRITE,
> BLOCK_ACCT_FLUSH,
> BLOCK_ACCT_UNMAP,
> + BLOCK_ACCT_GET_LBA_STATUS,
> BLOCK_MAX_IOTYPE,
> };
>
> diff --git a/include/scsi/constants.h b/include/scsi/constants.h
> index 874176019e..b18377b214 100644
> --- a/include/scsi/constants.h
> +++ b/include/scsi/constants.h
> @@ -154,6 +154,7 @@
> * SERVICE ACTION IN subcodes
> */
> #define SAI_READ_CAPACITY_16 0x10
> +#define SAI_GET_LBA_STATUS 0x12
>
> /*
> * READ POSITION service action codes
> --
> 2.26.0
>
> -----邮件原件-----
> 发件人: Stefan Hajnoczi <stefanha@redhat.com>
> 发送时间: 2020年6月22日 20:14
> 收件人: Lin Ma <LMa@suse.com>
> 抄送: qemu-devel@nongnu.org; fam@euphon.net; kwolf@redhat.com;
> mreitz@redhat.com; pbonzini@redhat.com
> 主题: Re: [PATCH v2 3/3] scsi-disk: Add support for the GET LBA STATUS 16
> command
>
> On Wed, Jun 17, 2020 at 06:30:18PM +0800, Lin Ma wrote:
> > Signed-off-by: Lin Ma <lma@suse.com>
> > ---
> > hw/scsi/scsi-disk.c | 90
> ++++++++++++++++++++++++++++++++++++++
> > include/block/accounting.h | 1 +
> > include/scsi/constants.h | 1 +
> > 3 files changed, 92 insertions(+)
> >
> > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index
> > 387503e11b..9e3002ddaf 100644
> > --- a/hw/scsi/scsi-disk.c
> > +++ b/hw/scsi/scsi-disk.c
> > @@ -1866,6 +1866,89 @@ static void
> scsi_disk_emulate_write_data(SCSIRequest *req)
> > }
> > }
> >
> > +typedef struct GetLbaStatusCBData {
> > + uint32_t num_blocks;
> > + uint32_t is_deallocated;
> > + SCSIDiskReq *r;
> > +} GetLbaStatusCBData;
> > +
> > +static void scsi_get_lba_status_complete(void *opaque, int ret);
> > +
> > +static void scsi_get_lba_status_complete_noio(GetLbaStatusCBData
> > +*data, int ret) {
> > + SCSIDiskReq *r = data->r;
> > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > +
> > + assert(r->req.aiocb == NULL);
> > +
> > + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
> > + s->qdev.blocksize,
> BLOCK_ACCT_GET_LBA_STATUS);
> > +
> > + r->req.aiocb = blk_aio_get_lba_status(s->qdev.conf.blk,
> > + r->req.cmd.lba *
> s->qdev.blocksize,
> > + s->qdev.blocksize,
> > +
> > +scsi_get_lba_status_complete, data); }
> > +
> > +static void scsi_get_lba_status_complete(void *opaque, int ret) {
> > + GetLbaStatusCBData *data = opaque;
> > + SCSIDiskReq *r = data->r;
> > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > +
> > + assert(r->req.aiocb != NULL);
> > + r->req.aiocb = NULL;
> > +
> > + aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
> > + if (scsi_disk_req_check_error(r, ret, true)) {
> > + g_free(data);
> > + goto done;
> > + }
> > +
> > + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
> > + scsi_req_unref(&r->req);
> > + g_free(data);
> > +
> > +done:
> > + aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
> > +}
> > +
> > +static void scsi_disk_emulate_get_lba_status(SCSIRequest *req,
> > +uint8_t *outbuf) {
> > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> > + GetLbaStatusCBData *data;
> > + uint32_t *num_blocks;
> > + uint32_t *is_deallocated;
> > +
> > + data = g_new0(GetLbaStatusCBData, 1);
> > + data->r = r;
> > + num_blocks = &(data->num_blocks);
> > + is_deallocated = &(data->is_deallocated);
> > +
> > + scsi_req_ref(&r->req);
> > + scsi_get_lba_status_complete_noio(data, 0);
>
> scsi_get_lba_status_complete_noio() looks asynchronous. If the BlockDriver
> yields in .bdrv_co_block_status() then the operation has not completed yet
> when scsi_get_lba_status_complete_noio() returns. It is not safe to access the
> GetLbaStatusCBData data until the async operation is complete.
>
> Also, scsi_get_lba_status_complete() calls g_free(data) so there is a
> use-after-free when *num_blocks and *is_deallocated are accessed.
Got it, I'll fill the outbuf[] in the completion function in V3.
> These issues can be solved by making this code asynchronous (similar to
> read/write/flush/discard_zeroes/ioctl). outbuf[] will be filled in in the completion
> function before g_free(data) is called.
I looked into block/io.c, The 'bdrv_co_pdiscard()', the 'bdrv_co_block_status' and the
'bdrv_co_flush()', They look similiar, They called corresponding bs->drv->bdrv_co_*()
or the bs->drv->bdrv_aio_*() between pair of blk_inc/dec_in_flight():
The 'bdrv_co_pdiscard()' calls bs->drv->bdrv_co_pdiscard() or bs->drv->bdrv_aio_pdiscard()
The 'bdrv_co_flush()' calls bs->drv->bdrv_co_flush*() or bs->drv->bdrv_aio_flush().
The 'bdrv_co_block_status' calls bs->drv->bdrv_co_block_status(). qemu contains the
coroutine version of block_status, no aio version of block_status.
About "making this code asynchronous", Well, In fact I havn't realized yet where the issue is.
If what you mean is that make the 'bdrv_co_get_lba_status()' asynchronous, How about
directly calling coroutine-based 'bdrv_co_block_status()' instead of 'bdrv_block_status()' in it?
Or could you please suggest more detailed information?
BTW, IMO the existing BlockDriver->bdrv_co_block_status() is enough, It's not necessary to
implement a drv->bdrv_aio_get_block_status() in BlockDrivers(say qcow2 or raw), Am I right?
I'm not familiar with qemu block layer and coroutine, Sorry for any inconvenience.
> > +
> > + /*
> > + * 8 + 16 is the length in bytes of response header and
> > + * one LBA status descriptor
> > + */
> > + memset(outbuf, 0, 8 + 16);
> > + outbuf[3] = 20;
> > + outbuf[8] = (req->cmd.lba >> 56) & 0xff;
> > + outbuf[9] = (req->cmd.lba >> 48) & 0xff;
> > + outbuf[10] = (req->cmd.lba >> 40) & 0xff;
> > + outbuf[11] = (req->cmd.lba >> 32) & 0xff;
> > + outbuf[12] = (req->cmd.lba >> 24) & 0xff;
> > + outbuf[13] = (req->cmd.lba >> 16) & 0xff;
> > + outbuf[14] = (req->cmd.lba >> 8) & 0xff;
> > + outbuf[15] = req->cmd.lba & 0xff;
> > + outbuf[16] = (*num_blocks >> 24) & 0xff;
> > + outbuf[17] = (*num_blocks >> 16) & 0xff;
> > + outbuf[18] = (*num_blocks >> 8) & 0xff;
> > + outbuf[19] = *num_blocks & 0xff;
> > + outbuf[20] = *is_deallocated ? 1 : 0;
>
> SCSI defines 3 values and QEMU can represent all of them:
>
> 0 - mapped or unknown
> 1 - deallocated
> 2 - anchored
>
> See the BDRV_BLOCK_* constants in include/block/block.h. The is_deallocated
> boolean is not enough to represent this state, but the
> bdrv_block_status() return value can be used instead.
>
> > +}
> > +
> > static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t
> > *buf) {
> > SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); @@ -2076,6
> > +2159,13 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req,
> > uint8_t *buf)
> >
> > /* Protection, exponent and lowest lba field left blank. */
> > break;
> > + } else if ((req->cmd.buf[1] & 31) == SAI_GET_LBA_STATUS) {
> > + if (req->cmd.lba > s->qdev.max_lba) {
> > + goto illegal_lba;
> > + }
> > + scsi_disk_emulate_get_lba_status(req, outbuf);
> > + r->iov.iov_len = req->cmd.xfer;
> > + return r->iov.iov_len;
>
> Is there something tricky going on here with iov_len that prevents us from using
> break here and sharing the functions normal return code path?
>
> > }
> > trace_scsi_disk_emulate_command_SAI_unsupported();
> > goto illegal_request;
> > diff --git a/include/block/accounting.h b/include/block/accounting.h
> > index 878b4c3581..645014fb0b 100644
> > --- a/include/block/accounting.h
> > +++ b/include/block/accounting.h
> > @@ -38,6 +38,7 @@ enum BlockAcctType {
> > BLOCK_ACCT_WRITE,
> > BLOCK_ACCT_FLUSH,
> > BLOCK_ACCT_UNMAP,
> > + BLOCK_ACCT_GET_LBA_STATUS,
> > BLOCK_MAX_IOTYPE,
> > };
> >
> > diff --git a/include/scsi/constants.h b/include/scsi/constants.h index
> > 874176019e..b18377b214 100644
> > --- a/include/scsi/constants.h
> > +++ b/include/scsi/constants.h
> > @@ -154,6 +154,7 @@
> > * SERVICE ACTION IN subcodes
> > */
> > #define SAI_READ_CAPACITY_16 0x10
> > +#define SAI_GET_LBA_STATUS 0x12
> >
> > /*
> > * READ POSITION service action codes
> > --
> > 2.26.0
> >
On Mon, Jun 29, 2020 at 09:18:59AM +0000, Lin Ma wrote:
>
>
> > -----邮件原件-----
> > 发件人: Stefan Hajnoczi <stefanha@redhat.com>
> > 发送时间: 2020年6月22日 20:14
> > 收件人: Lin Ma <LMa@suse.com>
> > 抄送: qemu-devel@nongnu.org; fam@euphon.net; kwolf@redhat.com;
> > mreitz@redhat.com; pbonzini@redhat.com
> > 主题: Re: [PATCH v2 3/3] scsi-disk: Add support for the GET LBA STATUS 16
> > command
> >
> > On Wed, Jun 17, 2020 at 06:30:18PM +0800, Lin Ma wrote:
> > > Signed-off-by: Lin Ma <lma@suse.com>
> > > ---
> > > hw/scsi/scsi-disk.c | 90
> > ++++++++++++++++++++++++++++++++++++++
> > > include/block/accounting.h | 1 +
> > > include/scsi/constants.h | 1 +
> > > 3 files changed, 92 insertions(+)
> > >
> > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index
> > > 387503e11b..9e3002ddaf 100644
> > > --- a/hw/scsi/scsi-disk.c
> > > +++ b/hw/scsi/scsi-disk.c
> > > @@ -1866,6 +1866,89 @@ static void
> > scsi_disk_emulate_write_data(SCSIRequest *req)
> > > }
> > > }
> > >
> > > +typedef struct GetLbaStatusCBData {
> > > + uint32_t num_blocks;
> > > + uint32_t is_deallocated;
> > > + SCSIDiskReq *r;
> > > +} GetLbaStatusCBData;
> > > +
> > > +static void scsi_get_lba_status_complete(void *opaque, int ret);
> > > +
> > > +static void scsi_get_lba_status_complete_noio(GetLbaStatusCBData
> > > +*data, int ret) {
> > > + SCSIDiskReq *r = data->r;
> > > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > > +
> > > + assert(r->req.aiocb == NULL);
> > > +
> > > + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
> > > + s->qdev.blocksize,
> > BLOCK_ACCT_GET_LBA_STATUS);
> > > +
> > > + r->req.aiocb = blk_aio_get_lba_status(s->qdev.conf.blk,
> > > + r->req.cmd.lba *
> > s->qdev.blocksize,
> > > + s->qdev.blocksize,
> > > +
> > > +scsi_get_lba_status_complete, data); }
> > > +
> > > +static void scsi_get_lba_status_complete(void *opaque, int ret) {
> > > + GetLbaStatusCBData *data = opaque;
> > > + SCSIDiskReq *r = data->r;
> > > + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> > > +
> > > + assert(r->req.aiocb != NULL);
> > > + r->req.aiocb = NULL;
> > > +
> > > + aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
> > > + if (scsi_disk_req_check_error(r, ret, true)) {
> > > + g_free(data);
> > > + goto done;
> > > + }
> > > +
> > > + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
> > > + scsi_req_unref(&r->req);
> > > + g_free(data);
> > > +
> > > +done:
> > > + aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
> > > +}
> > > +
> > > +static void scsi_disk_emulate_get_lba_status(SCSIRequest *req,
> > > +uint8_t *outbuf) {
> > > + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> > > + GetLbaStatusCBData *data;
> > > + uint32_t *num_blocks;
> > > + uint32_t *is_deallocated;
> > > +
> > > + data = g_new0(GetLbaStatusCBData, 1);
> > > + data->r = r;
> > > + num_blocks = &(data->num_blocks);
> > > + is_deallocated = &(data->is_deallocated);
> > > +
> > > + scsi_req_ref(&r->req);
> > > + scsi_get_lba_status_complete_noio(data, 0);
> >
> > scsi_get_lba_status_complete_noio() looks asynchronous. If the BlockDriver
> > yields in .bdrv_co_block_status() then the operation has not completed yet
> > when scsi_get_lba_status_complete_noio() returns. It is not safe to access the
> > GetLbaStatusCBData data until the async operation is complete.
> >
> > Also, scsi_get_lba_status_complete() calls g_free(data) so there is a
> > use-after-free when *num_blocks and *is_deallocated are accessed.
>
> Got it, I'll fill the outbuf[] in the completion function in V3.
>
> > These issues can be solved by making this code asynchronous (similar to
> > read/write/flush/discard_zeroes/ioctl). outbuf[] will be filled in in the completion
> > function before g_free(data) is called.
>
> I looked into block/io.c, The 'bdrv_co_pdiscard()', the 'bdrv_co_block_status' and the
> 'bdrv_co_flush()', They look similiar, They called corresponding bs->drv->bdrv_co_*()
> or the bs->drv->bdrv_aio_*() between pair of blk_inc/dec_in_flight():
> The 'bdrv_co_pdiscard()' calls bs->drv->bdrv_co_pdiscard() or bs->drv->bdrv_aio_pdiscard()
> The 'bdrv_co_flush()' calls bs->drv->bdrv_co_flush*() or bs->drv->bdrv_aio_flush().
> The 'bdrv_co_block_status' calls bs->drv->bdrv_co_block_status(). qemu contains the
> coroutine version of block_status, no aio version of block_status.
>
> About "making this code asynchronous", Well, In fact I havn't realized yet where the issue is.
> If what you mean is that make the 'bdrv_co_get_lba_status()' asynchronous, How about
> directly calling coroutine-based 'bdrv_co_block_status()' instead of 'bdrv_block_status()' in it?
> Or could you please suggest more detailed information?
> BTW, IMO the existing BlockDriver->bdrv_co_block_status() is enough, It's not necessary to
> implement a drv->bdrv_aio_get_block_status() in BlockDrivers(say qcow2 or raw), Am I right?
scsi_disk_emulate_get_lba_status() is called outside coroutine context.
It is expected to return without blocking so that other activity can
continue (e.g. the vCPU can continue execution).
scsi_disk_emulate_get_lba_status() cannot fill in outbuf because we may
not have fetched the LBA status yet when it needs to return.
Luckily there are other SCSI commands in scsi_disk_emulate_command()
that are asynchronous. You can follow their model:
case SYNCHRONIZE_CACHE:
/* The request is used as the AIO opaque value, so add a ref. */
scsi_req_ref(&r->req);
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
return 0;
The request is not completed by scsi_disk_emulate_command(). Instead
blk_aio_flush() launches a flush operation and the SCSI request is
passed along as the argument to the scsi_aio_complete() completion
function.
Something similar is needed for GET_LBA_STATUS. Since there is no
bdrv_aio_block_status() you can create a coroutine instead of an aiocb:
static void coroutine_fn scsi_co_block_status(void *opaque)
{
int ret;
aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
ret = bdrv_co_block_status(...);
...fill in outbuf...
scsi_req_complete(&r->req, GOOD);
aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
scsi_req_unref(&r->req);
}
...in scsi_disk_emulate_command()...
scsi_req_ref(&r->req);
co = qemu_coroutine_create(scsi_co_block_status, r);
aio_co_schedule(blk_get_aio_context(s->qdev.conf.blk), co);
return 0;
This is just a sketch, I haven't checked the details. The trickiest
issue is probably how to deal with r->req.aiocb, which is normally set
for async requests. It will be necessary to study the code to figure out
a solution because there is no BlockAIOCB in this case (we're using a
coroutine instead).
Stefan
On 08/07/20 14:29, Stefan Hajnoczi wrote:
> Something similar is needed for GET_LBA_STATUS. Since there is no
> bdrv_aio_block_status() you can create a coroutine instead of an aiocb:
>
> static void coroutine_fn scsi_co_block_status(void *opaque)
> {
> int ret;
>
> aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
>
> ret = bdrv_co_block_status(...);
>
> ...fill in outbuf...
>
> scsi_req_complete(&r->req, GOOD);
>
> aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
>
> scsi_req_unref(&r->req);
> }
>
> ...in scsi_disk_emulate_command()...
> scsi_req_ref(&r->req);
> co = qemu_coroutine_create(scsi_co_block_status, r);
> aio_co_schedule(blk_get_aio_context(s->qdev.conf.blk), co);
> return 0;
>
> This is just a sketch, I haven't checked the details. The trickiest
> issue is probably how to deal with r->req.aiocb, which is normally set
> for async requests. It will be necessary to study the code to figure out
> a solution because there is no BlockAIOCB in this case (we're using a
> coroutine instead).
It's probably simplest to put the code above in block/block-backend.c,
in the form of blk_aio_block_status which would follow what is done in
blk_aio_prwv.
Paolo
© 2016 - 2026 Red Hat, Inc.