block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-)
From: tianqing <tianqing@unitedstack.com>
Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.
Signed-off-by: tianqing <tianqing@unitedstack.com>
---
block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 7 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index a57b3e3..75ae1d6 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -47,7 +47,7 @@
*/
/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0)
#define LIBRBD_SUPPORTS_DISCARD
#else
#undef LIBRBD_SUPPORTS_DISCARD
@@ -73,7 +73,12 @@ typedef struct RBDAIOCB {
BlockAIOCB common;
int64_t ret;
QEMUIOVector *qiov;
+/* Note:
+ * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.
+ */
+#ifndef LIBRBD_SUPPORTS_IOVEC
char *bounce;
+#endif
RBDAIOCmd cmd;
int error;
struct BDRVRBDState *s;
@@ -83,7 +88,9 @@ typedef struct RADOSCB {
RBDAIOCB *acb;
struct BDRVRBDState *s;
int64_t size;
+#ifndef LIBRBD_SUPPORTS_IOVEC
char *buf;
+#endif
int64_t ret;
} RADOSCB;
@@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
} else {
if (r < 0) {
+#ifndef LIBRBD_SUPPORTS_IOVEC
memset(rcb->buf, 0, rcb->size);
+#else
+ iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);
+#endif
acb->ret = r;
acb->error = 1;
} else if (r < rcb->size) {
+#ifndef LIBRBD_SUPPORTS_IOVEC
memset(rcb->buf + r, 0, rcb->size - r);
+#else
+ iov_memset(acb->qiov->iov, acb->qiov->niov,
+ r, 0, acb->qiov->size - r);
+#endif
+
if (!acb->error) {
acb->ret = rcb->size;
}
@@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
g_free(rcb);
+#ifndef LIBRBD_SUPPORTS_IOVEC
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
}
qemu_vfree(acb->bounce);
+#endif
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_aio_unref(acb);
@@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
- char *buf;
int r;
+#ifndef LIBRBD_SUPPORTS_IOVEC
+ char *buf = NULL;
+#endif
BDRVRBDState *s = bs->opaque;
@@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
+#ifndef LIBRBD_SUPPORTS_IOVEC
+
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
@@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
goto failed;
}
}
- acb->ret = 0;
- acb->error = 0;
- acb->s = s;
-
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
-
buf = acb->bounce;
+#endif
+ acb->ret = 0;
+ acb->error = 0;
+ acb->s = s;
rcb = g_new(RADOSCB, 1);
+
rcb->acb = acb;
+#ifndef LIBRBD_SUPPORTS_IOVEC
rcb->buf = buf;
+#endif
rcb->s = acb->s;
rcb->size = size;
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
switch (cmd) {
case RBD_AIO_WRITE:
+#ifndef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_write(s->image, off, size, buf, c);
+#else
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
+#endif
break;
case RBD_AIO_READ:
+#ifndef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_read(s->image, off, size, buf, c);
+#else
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+#endif
break;
case RBD_AIO_DISCARD:
r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -719,7 +752,9 @@ failed_completion:
rbd_aio_release(c);
failed:
g_free(rcb);
+#ifndef LIBRBD_SUPPORTS_IOVEC
qemu_vfree(acb->bounce);
+#endif
qemu_aio_unref(acb);
return NULL;
}
--
2.10.2
Tianqing, Do we have any performance data for this patch? Thanks. Tiger > 在 2017年2月16日,下午5:00,jazeltq@gmail.com 写道: > > From: tianqing <tianqing@unitedstack.com> > > Rbd can do readv and writev directly, so wo do not need to transform > iov to buf or vice versa any more. > > Signed-off-by: tianqing <tianqing@unitedstack.com> > --- > block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 7 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index a57b3e3..75ae1d6 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -47,7 +47,7 @@ > */ > > /* rbd_aio_discard added in 0.1.2 */ > -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) > +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) > #define LIBRBD_SUPPORTS_DISCARD > #else > #undef LIBRBD_SUPPORTS_DISCARD > @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { > BlockAIOCB common; > int64_t ret; > QEMUIOVector *qiov; > +/* Note: > + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. > + */ > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *bounce; > +#endif > RBDAIOCmd cmd; > int error; > struct BDRVRBDState *s; > @@ -83,7 +88,9 @@ typedef struct RADOSCB { > RBDAIOCB *acb; > struct BDRVRBDState *s; > int64_t size; > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *buf; > +#endif > int64_t ret; > } RADOSCB; > > @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > } > } else { > if (r < 0) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf, 0, rcb->size); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); > +#endif > acb->ret = r; > acb->error = 1; > } else if (r < rcb->size) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf + r, 0, rcb->size - r); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, > + r, 0, acb->qiov->size - r); > +#endif > + > if (!acb->error) { > acb->ret = rcb->size; > } > @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > > g_free(rcb); > > +#ifndef LIBRBD_SUPPORTS_IOVEC > if (acb->cmd == RBD_AIO_READ) { > qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); > } > qemu_vfree(acb->bounce); > +#endif > acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); > > qemu_aio_unref(acb); > @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > RBDAIOCB *acb; > RADOSCB *rcb = NULL; > rbd_completion_t c; > - char *buf; > int r; > +#ifndef LIBRBD_SUPPORTS_IOVEC > + char *buf = NULL; > +#endif > > BDRVRBDState *s = bs->opaque; > > @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > acb->cmd = cmd; > acb->qiov = qiov; > assert(!qiov || qiov->size == size); > +#ifndef LIBRBD_SUPPORTS_IOVEC > + > if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { > acb->bounce = NULL; > } else { > @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > goto failed; > } > } > - acb->ret = 0; > - acb->error = 0; > - acb->s = s; > - > if (cmd == RBD_AIO_WRITE) { > qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); > } > - > buf = acb->bounce; > +#endif > + acb->ret = 0; > + acb->error = 0; > + acb->s = s; > > rcb = g_new(RADOSCB, 1); > + > rcb->acb = acb; > +#ifndef LIBRBD_SUPPORTS_IOVEC > rcb->buf = buf; > +#endif > rcb->s = acb->s; > rcb->size = size; > r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); > @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > > switch (cmd) { > case RBD_AIO_WRITE: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_write(s->image, off, size, buf, c); > +#else > + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_READ: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_read(s->image, off, size, buf, c); > +#else > + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_DISCARD: > r = rbd_aio_discard_wrapper(s->image, off, size, c); > @@ -719,7 +752,9 @@ failed_completion: > rbd_aio_release(c); > failed: > g_free(rcb); > +#ifndef LIBRBD_SUPPORTS_IOVEC > qemu_vfree(acb->bounce); > +#endif > qemu_aio_unref(acb); > return NULL; > } > -- > 2.10.2 > > -- > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
No yet. I just test on one qemu-kvm vm. It works fine. The performance may need more time. Any one can test on this patch if you do fast.... 2017-02-16 20:07 GMT+08:00 Tiger Hu <hufh2004@gmail.com>: > Tianqing, > > Do we have any performance data for this patch? Thanks. > > Tiger >> 在 2017年2月16日,下午5:00,jazeltq@gmail.com 写道: >> >> From: tianqing <tianqing@unitedstack.com> >> >> Rbd can do readv and writev directly, so wo do not need to transform >> iov to buf or vice versa any more. >> >> Signed-off-by: tianqing <tianqing@unitedstack.com> >> --- >> block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 42 insertions(+), 7 deletions(-) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index a57b3e3..75ae1d6 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -47,7 +47,7 @@ >> */ >> >> /* rbd_aio_discard added in 0.1.2 */ >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) >> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) >> #define LIBRBD_SUPPORTS_DISCARD >> #else >> #undef LIBRBD_SUPPORTS_DISCARD >> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { >> BlockAIOCB common; >> int64_t ret; >> QEMUIOVector *qiov; >> +/* Note: >> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. >> + */ >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *bounce; >> +#endif >> RBDAIOCmd cmd; >> int error; >> struct BDRVRBDState *s; >> @@ -83,7 +88,9 @@ typedef struct RADOSCB { >> RBDAIOCB *acb; >> struct BDRVRBDState *s; >> int64_t size; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *buf; >> +#endif >> int64_t ret; >> } RADOSCB; >> >> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> } >> } else { >> if (r < 0) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf, 0, rcb->size); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); >> +#endif >> acb->ret = r; >> acb->error = 1; >> } else if (r < rcb->size) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf + r, 0, rcb->size - r); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, >> + r, 0, acb->qiov->size - r); >> +#endif >> + >> if (!acb->error) { >> acb->ret = rcb->size; >> } >> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> >> g_free(rcb); >> >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> if (acb->cmd == RBD_AIO_READ) { >> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); >> } >> qemu_vfree(acb->bounce); >> +#endif >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); >> >> qemu_aio_unref(acb); >> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> RBDAIOCB *acb; >> RADOSCB *rcb = NULL; >> rbd_completion_t c; >> - char *buf; >> int r; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + char *buf = NULL; >> +#endif >> >> BDRVRBDState *s = bs->opaque; >> >> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> acb->cmd = cmd; >> acb->qiov = qiov; >> assert(!qiov || qiov->size == size); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + >> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { >> acb->bounce = NULL; >> } else { >> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> goto failed; >> } >> } >> - acb->ret = 0; >> - acb->error = 0; >> - acb->s = s; >> - >> if (cmd == RBD_AIO_WRITE) { >> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); >> } >> - >> buf = acb->bounce; >> +#endif >> + acb->ret = 0; >> + acb->error = 0; >> + acb->s = s; >> >> rcb = g_new(RADOSCB, 1); >> + >> rcb->acb = acb; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> rcb->buf = buf; >> +#endif >> rcb->s = acb->s; >> rcb->size = size; >> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); >> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> >> switch (cmd) { >> case RBD_AIO_WRITE: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_write(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_READ: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_read(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_DISCARD: >> r = rbd_aio_discard_wrapper(s->image, off, size, c); >> @@ -719,7 +752,9 @@ failed_completion: >> rbd_aio_release(c); >> failed: >> g_free(rcb); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> qemu_vfree(acb->bounce); >> +#endif >> qemu_aio_unref(acb); >> return NULL; >> } >> -- >> 2.10.2 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- 谦谦君子
>>No yet. I just test on one qemu-kvm vm. It works fine. >>The performance may need more time. >>Any one can test on this patch if you do fast.... Hi, I would like to bench it with small 4k read/write. On the ceph side,do we need this PR ? : https://github.com/ceph/ceph/pull/13447 ----- Mail original ----- De: "jazeltq" <jazeltq@gmail.com> À: "Tiger Hu" <hufh2004@gmail.com> Cc: "Josh Durgin" <jdurgin@redhat.com>, "Jeff Cody" <jcody@redhat.com>, "dillaman" <dillaman@redhat.com>, "Kevin Wolf" <kwolf@redhat.com>, mreitz@redhat.com, qemu-block@nongnu.org, "qemu-devel" <qemu-devel@nongnu.org>, "ceph-devel" <ceph-devel@vger.kernel.org>, "tianqing" <tianqing@unitedstack.com> Envoyé: Jeudi 16 Février 2017 15:03:52 Objet: Re: [RFC v5] RBD: Add support readv,writev for rbd No yet. I just test on one qemu-kvm vm. It works fine. The performance may need more time. Any one can test on this patch if you do fast.... 2017-02-16 20:07 GMT+08:00 Tiger Hu <hufh2004@gmail.com>: > Tianqing, > > Do we have any performance data for this patch? Thanks. > > Tiger >> 在 2017年2月16日,下午5:00,jazeltq@gmail.com 写道: >> >> From: tianqing <tianqing@unitedstack.com> >> >> Rbd can do readv and writev directly, so wo do not need to transform >> iov to buf or vice versa any more. >> >> Signed-off-by: tianqing <tianqing@unitedstack.com> >> --- >> block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 42 insertions(+), 7 deletions(-) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index a57b3e3..75ae1d6 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -47,7 +47,7 @@ >> */ >> >> /* rbd_aio_discard added in 0.1.2 */ >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) >> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) >> #define LIBRBD_SUPPORTS_DISCARD >> #else >> #undef LIBRBD_SUPPORTS_DISCARD >> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { >> BlockAIOCB common; >> int64_t ret; >> QEMUIOVector *qiov; >> +/* Note: >> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. >> + */ >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *bounce; >> +#endif >> RBDAIOCmd cmd; >> int error; >> struct BDRVRBDState *s; >> @@ -83,7 +88,9 @@ typedef struct RADOSCB { >> RBDAIOCB *acb; >> struct BDRVRBDState *s; >> int64_t size; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *buf; >> +#endif >> int64_t ret; >> } RADOSCB; >> >> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> } >> } else { >> if (r < 0) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf, 0, rcb->size); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); >> +#endif >> acb->ret = r; >> acb->error = 1; >> } else if (r < rcb->size) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf + r, 0, rcb->size - r); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, >> + r, 0, acb->qiov->size - r); >> +#endif >> + >> if (!acb->error) { >> acb->ret = rcb->size; >> } >> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> >> g_free(rcb); >> >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> if (acb->cmd == RBD_AIO_READ) { >> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); >> } >> qemu_vfree(acb->bounce); >> +#endif >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); >> >> qemu_aio_unref(acb); >> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> RBDAIOCB *acb; >> RADOSCB *rcb = NULL; >> rbd_completion_t c; >> - char *buf; >> int r; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + char *buf = NULL; >> +#endif >> >> BDRVRBDState *s = bs->opaque; >> >> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> acb->cmd = cmd; >> acb->qiov = qiov; >> assert(!qiov || qiov->size == size); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + >> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { >> acb->bounce = NULL; >> } else { >> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> goto failed; >> } >> } >> - acb->ret = 0; >> - acb->error = 0; >> - acb->s = s; >> - >> if (cmd == RBD_AIO_WRITE) { >> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); >> } >> - >> buf = acb->bounce; >> +#endif >> + acb->ret = 0; >> + acb->error = 0; >> + acb->s = s; >> >> rcb = g_new(RADOSCB, 1); >> + >> rcb->acb = acb; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> rcb->buf = buf; >> +#endif >> rcb->s = acb->s; >> rcb->size = size; >> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); >> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> >> switch (cmd) { >> case RBD_AIO_WRITE: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_write(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_READ: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_read(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_DISCARD: >> r = rbd_aio_discard_wrapper(s->image, off, size, c); >> @@ -719,7 +752,9 @@ failed_completion: >> rbd_aio_release(c); >> failed: >> g_free(rcb); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> qemu_vfree(acb->bounce); >> +#endif >> qemu_aio_unref(acb); >> return NULL; >> } >> -- >> 2.10.2 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- 谦谦君子 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Feb 16, 2017 at 10:13 AM, Alexandre DERUMIER <aderumier@odiso.com> wrote: > Hi, I would like to bench it with small 4k read/write. > > On the ceph side,do we need this PR ? : > https://github.com/ceph/ceph/pull/13447 Yes, that is the correct PR for the client-side librbd changes. You should be able to test it against Hammer/Jewel-release clusters. -- Jason
On Thu, Feb 16, 2017 at 4:00 AM, <jazeltq@gmail.com> wrote: > From: tianqing <tianqing@unitedstack.com> > > Rbd can do readv and writev directly, so wo do not need to transform > iov to buf or vice versa any more. > > Signed-off-by: tianqing <tianqing@unitedstack.com> > --- > block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 7 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index a57b3e3..75ae1d6 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -47,7 +47,7 @@ > */ > > /* rbd_aio_discard added in 0.1.2 */ > -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) > +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) > #define LIBRBD_SUPPORTS_DISCARD > #else > #undef LIBRBD_SUPPORTS_DISCARD Do not change this -- discard support is available in very old versions of librbd not just the future Luminous release. > @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { > BlockAIOCB common; > int64_t ret; > QEMUIOVector *qiov; > +/* Note: > + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. > + */ > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *bounce; > +#endif > RBDAIOCmd cmd; > int error; > struct BDRVRBDState *s; > @@ -83,7 +88,9 @@ typedef struct RADOSCB { > RBDAIOCB *acb; > struct BDRVRBDState *s; > int64_t size; > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *buf; > +#endif > int64_t ret; > } RADOSCB; > > @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > } > } else { > if (r < 0) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf, 0, rcb->size); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); > +#endif > acb->ret = r; > acb->error = 1; > } else if (r < rcb->size) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf + r, 0, rcb->size - r); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, > + r, 0, acb->qiov->size - r); > +#endif > + > if (!acb->error) { > acb->ret = rcb->size; > } > @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > > g_free(rcb); > > +#ifndef LIBRBD_SUPPORTS_IOVEC > if (acb->cmd == RBD_AIO_READ) { > qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); > } > qemu_vfree(acb->bounce); > +#endif > acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); > > qemu_aio_unref(acb); > @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > RBDAIOCB *acb; > RADOSCB *rcb = NULL; > rbd_completion_t c; > - char *buf; > int r; > +#ifndef LIBRBD_SUPPORTS_IOVEC > + char *buf = NULL; > +#endif > > BDRVRBDState *s = bs->opaque; > > @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > acb->cmd = cmd; > acb->qiov = qiov; > assert(!qiov || qiov->size == size); > +#ifndef LIBRBD_SUPPORTS_IOVEC > + > if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { > acb->bounce = NULL; > } else { > @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > goto failed; > } > } > - acb->ret = 0; > - acb->error = 0; > - acb->s = s; > - > if (cmd == RBD_AIO_WRITE) { > qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); > } > - > buf = acb->bounce; > +#endif > + acb->ret = 0; > + acb->error = 0; > + acb->s = s; > > rcb = g_new(RADOSCB, 1); > + > rcb->acb = acb; > +#ifndef LIBRBD_SUPPORTS_IOVEC > rcb->buf = buf; > +#endif > rcb->s = acb->s; > rcb->size = size; > r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); > @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > > switch (cmd) { > case RBD_AIO_WRITE: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_write(s->image, off, size, buf, c); > +#else > + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_READ: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_read(s->image, off, size, buf, c); > +#else > + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_DISCARD: > r = rbd_aio_discard_wrapper(s->image, off, size, c); > @@ -719,7 +752,9 @@ failed_completion: > rbd_aio_release(c); > failed: > g_free(rcb); > +#ifndef LIBRBD_SUPPORTS_IOVEC > qemu_vfree(acb->bounce); > +#endif > qemu_aio_unref(acb); > return NULL; > } > -- > 2.10.2 > -- Jason
2017-02-16 22:14 GMT+08:00 Jason Dillaman <jdillama@redhat.com>: > On Thu, Feb 16, 2017 at 4:00 AM, <jazeltq@gmail.com> wrote: >> From: tianqing <tianqing@unitedstack.com> >> >> Rbd can do readv and writev directly, so wo do not need to transform >> iov to buf or vice versa any more. >> >> Signed-off-by: tianqing <tianqing@unitedstack.com> >> --- >> block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 42 insertions(+), 7 deletions(-) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index a57b3e3..75ae1d6 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -47,7 +47,7 @@ >> */ >> >> /* rbd_aio_discard added in 0.1.2 */ >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) >> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) >> #define LIBRBD_SUPPORTS_DISCARD >> #else >> #undef LIBRBD_SUPPORTS_DISCARD > > Do not change this -- discard support is available in very old > versions of librbd not just the future Luminous release. Ok. Thanks a lot. > >> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { >> BlockAIOCB common; >> int64_t ret; >> QEMUIOVector *qiov; >> +/* Note: >> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. >> + */ >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *bounce; >> +#endif >> RBDAIOCmd cmd; >> int error; >> struct BDRVRBDState *s; >> @@ -83,7 +88,9 @@ typedef struct RADOSCB { >> RBDAIOCB *acb; >> struct BDRVRBDState *s; >> int64_t size; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *buf; >> +#endif >> int64_t ret; >> } RADOSCB; >> >> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> } >> } else { >> if (r < 0) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf, 0, rcb->size); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); >> +#endif >> acb->ret = r; >> acb->error = 1; >> } else if (r < rcb->size) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf + r, 0, rcb->size - r); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, >> + r, 0, acb->qiov->size - r); >> +#endif >> + >> if (!acb->error) { >> acb->ret = rcb->size; >> } >> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> >> g_free(rcb); >> >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> if (acb->cmd == RBD_AIO_READ) { >> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); >> } >> qemu_vfree(acb->bounce); >> +#endif >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); >> >> qemu_aio_unref(acb); >> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> RBDAIOCB *acb; >> RADOSCB *rcb = NULL; >> rbd_completion_t c; >> - char *buf; >> int r; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + char *buf = NULL; >> +#endif >> >> BDRVRBDState *s = bs->opaque; >> >> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> acb->cmd = cmd; >> acb->qiov = qiov; >> assert(!qiov || qiov->size == size); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + >> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { >> acb->bounce = NULL; >> } else { >> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> goto failed; >> } >> } >> - acb->ret = 0; >> - acb->error = 0; >> - acb->s = s; >> - >> if (cmd == RBD_AIO_WRITE) { >> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); >> } >> - >> buf = acb->bounce; >> +#endif >> + acb->ret = 0; >> + acb->error = 0; >> + acb->s = s; >> >> rcb = g_new(RADOSCB, 1); >> + >> rcb->acb = acb; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> rcb->buf = buf; >> +#endif >> rcb->s = acb->s; >> rcb->size = size; >> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); >> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> >> switch (cmd) { >> case RBD_AIO_WRITE: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_write(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_READ: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_read(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_DISCARD: >> r = rbd_aio_discard_wrapper(s->image, off, size, c); >> @@ -719,7 +752,9 @@ failed_completion: >> rbd_aio_release(c); >> failed: >> g_free(rcb); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> qemu_vfree(acb->bounce); >> +#endif >> qemu_aio_unref(acb); >> return NULL; >> } >> -- >> 2.10.2 >> > > > > -- > Jason -- 谦谦君子
On 02/16/2017 03:00 AM, jazeltq@gmail.com wrote: > From: tianqing <tianqing@unitedstack.com> > > Rbd can do readv and writev directly, so wo do not need to transform > iov to buf or vice versa any more. In general, we prefer new revisions of a patch series to be sent as a new top-level thread, rather than buried in-reply-to the earlier revision, as nesting can mess up some of the automated tooling that tests whether a patch will introduce problems. > > Signed-off-by: tianqing <tianqing@unitedstack.com> > --- > block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 7 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index a57b3e3..75ae1d6 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -47,7 +47,7 @@ > */ > > /* rbd_aio_discard added in 0.1.2 */ > -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) > +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) Does the comment need updating, too? -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
On Thu, Feb 16, 2017 at 05:00:02PM +0800, jazeltq@gmail.com wrote: > From: tianqing <tianqing@unitedstack.com> > > Rbd can do readv and writev directly, so wo do not need to transform > iov to buf or vice versa any more. > > Signed-off-by: tianqing <tianqing@unitedstack.com> > --- > block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 7 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index a57b3e3..75ae1d6 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -47,7 +47,7 @@ > */ > > /* rbd_aio_discard added in 0.1.2 */ > -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) > +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) > #define LIBRBD_SUPPORTS_DISCARD > #else > #undef LIBRBD_SUPPORTS_DISCARD > @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { > BlockAIOCB common; > int64_t ret; > QEMUIOVector *qiov; > +/* Note: > + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. > + */ I understand that since this feature relies on new symbols in the library, the ifdef can't be entirely avoided. However, there are a few changes that can make the code much more readable, and greatly reduce the use of ifdefs through the code. For instance, you could have helper constants: #ifdef LIBRBD_SUPPORTS_IOVEC #define LIBRBD_USE_IOVEC 1 #else #define LIBRBD_USE_IOVEC 0 #endif Now that can be used for a more natural code, with minimal ifdefs. The compiler optimizations should still remove the code, as well. A helper function for memset will also keep things simpler: static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs) { if (LIBRBD_USE_IOVEC) { RBDAIOCB *acb = rcb->acb; iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0, acb->qiov->size - offs); } else { memset(rcb->buf + offs, 0, rcb->size - offs); } } > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *bounce; > +#endif This conditional can go away (see [1]). > RBDAIOCmd cmd; > int error; > struct BDRVRBDState *s; > @@ -83,7 +88,9 @@ typedef struct RADOSCB { > RBDAIOCB *acb; > struct BDRVRBDState *s; > int64_t size; > +#ifndef LIBRBD_SUPPORTS_IOVEC > char *buf; > +#endif As can this. > int64_t ret; > } RADOSCB; > > @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > } > } else { > if (r < 0) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf, 0, rcb->size); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); > +#endif This just becomes: qemu_rbd_memset(rcb, 0); > acb->ret = r; > acb->error = 1; > } else if (r < rcb->size) { > +#ifndef LIBRBD_SUPPORTS_IOVEC > memset(rcb->buf + r, 0, rcb->size - r); > +#else > + iov_memset(acb->qiov->iov, acb->qiov->niov, > + r, 0, acb->qiov->size - r); > +#endif > + Same here: qemu_rbd_memset(rcb, r); > if (!acb->error) { > acb->ret = rcb->size; > } > @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) > > g_free(rcb); > > +#ifndef LIBRBD_SUPPORTS_IOVEC > if (acb->cmd == RBD_AIO_READ) { > qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); > } > qemu_vfree(acb->bounce); > +#endif [1] Instead of a conditional, you can now use the constant with an if statement: if (!LIBRBD_USE_IOVEC) { if (acb->cmd == RBD_AIO_READ) { qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); } qemu_vfree(acb->bounce); } > acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); > > qemu_aio_unref(acb); > @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > RBDAIOCB *acb; > RADOSCB *rcb = NULL; > rbd_completion_t c; > - char *buf; > int r; > +#ifndef LIBRBD_SUPPORTS_IOVEC > + char *buf = NULL; > +#endif This variable can completely go away with some code re-arrangement (below) > > BDRVRBDState *s = bs->opaque; > > @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > acb->cmd = cmd; > acb->qiov = qiov; > assert(!qiov || qiov->size == size); [2] Allocate rcb here > +#ifndef LIBRBD_SUPPORTS_IOVEC > + You can use if(!LIBRBD_USE_IOVEC) here then. > if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { > acb->bounce = NULL; > } else { > @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > goto failed; > } > } > - acb->ret = 0; > - acb->error = 0; > - acb->s = s; > - > if (cmd == RBD_AIO_WRITE) { > qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); > } > - > buf = acb->bounce; This can just become rcb->buf = acb->bounce > +#endif > + acb->ret = 0; > + acb->error = 0; > + acb->s = s; > > rcb = g_new(RADOSCB, 1); Move this to [2] > + > rcb->acb = acb; > +#ifndef LIBRBD_SUPPORTS_IOVEC > rcb->buf = buf; > +#endif These 3 lines can then go away > rcb->s = acb->s; > rcb->size = size; > r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); > @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > > switch (cmd) { > case RBD_AIO_WRITE: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_write(s->image, off, size, buf, c); If you use rcb->buf here, that allows buf to go away completely. > +#else > + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_READ: > +#ifndef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_read(s->image, off, size, buf, c); Same > +#else > + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); > +#endif > break; > case RBD_AIO_DISCARD: > r = rbd_aio_discard_wrapper(s->image, off, size, c); > @@ -719,7 +752,9 @@ failed_completion: > rbd_aio_release(c); > failed: > g_free(rcb); > +#ifndef LIBRBD_SUPPORTS_IOVEC And use the if(!LIBRBD_USE_IOVEC) here too. > qemu_vfree(acb->bounce); > +#endif > qemu_aio_unref(acb); > return NULL; > } > -- > 2.10.2 > -Jeff
© 2016 - 2024 Red Hat, Inc.