block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 358 insertions(+), 91 deletions(-)
When an iSCSI session drops or an I/O stalls past a hard timeout,
requests could complete after teardown and coroutines might be woken
in an invalid state. This patch clarifies task ownership and hardens
completion paths.
Changes:
- Add a per-command deadline (I/O hard timeout). On expiry, cancel
the libiscsi task, detach the coroutine (co = NULL), and let the
callback free the task safely.
- Track inflight tasks and fail them promptly on session disconnect
when fail-fast is enabled. Throttle reconnect attempts and drive
them via the periodic timer and NOP keepalives.
- Always refresh the fd event mask after state changes. Tidy event,
read, and write handlers, and remove unused labels/duplicates.
- Arm deadlines only for heap-allocated tasks (read/write/flush).
Stack-based helper paths continue to wait synchronously without
deadlines.
User-visible effect: under error conditions we now return -ETIMEDOUT
or -ENOTCONN instead of hanging or crashing. Normal I/O behavior is
unchanged. Internally, a 5s hard timeout with fail-fast is enforced to
avoid indefinite stalls.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3067
Signed-off-by: CJ Chen <cjchen@igel.co.jp>
---
block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 358 insertions(+), 91 deletions(-)
diff --git a/block/iscsi.c b/block/iscsi.c
index 15b96ee880..094b51c47c 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@
#include "crypto/secret.h"
#include "scsi/utils.h"
#include "trace.h"
+#include "qemu/timer.h"
/* Conflict between scsi/utils.h and libiscsi! :( */
#define SCSI_XFER_NONE ISCSI_XFER_NONE
@@ -74,6 +75,8 @@ typedef struct IscsiLun {
QEMUTimer *nop_timer;
QEMUTimer *event_timer;
QemuMutex mutex;
+ int64_t next_reconnect_ms; /* throttle repeated reconnects */
+ bool last_logged_in; /* for state-change logging */
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
struct scsi_inquiry_device_designator *dd;
@@ -103,6 +106,9 @@ typedef struct IscsiLun {
bool dpofua;
bool has_write_same;
bool request_timed_out;
+ uint32_t io_hard_timeout_ms;
+ bool fail_fast;
+ QTAILQ_HEAD(, IscsiTask) inflight;
} IscsiLun;
typedef struct IscsiTask {
@@ -116,6 +122,12 @@ typedef struct IscsiTask {
QEMUTimer retry_timer;
int err_code;
char *err_str;
+ QEMUTimer deadline_timer;
+ bool deadline_armed;
+ bool hard_timed_out;
+ int64_t first_submit_ms;
+ QTAILQ_ENTRY(IscsiTask) entry;
+ bool on_list;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -185,7 +197,9 @@ static void iscsi_co_generic_bh_cb(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
- aio_co_wake(iTask->co);
+ if (iTask->co) {
+ aio_co_wake(iTask->co);
+ }
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -232,75 +246,148 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
sense->ascq & 0xFF);
}
+static void iscsi_fail_inflight(IscsiLun *s, int err)
+{
+ IscsiTask *it, *next;
+ int n = 0;
+
+ QTAILQ_FOREACH_SAFE(it, &s->inflight, entry, next) {
+ if (it->deadline_armed) {
+ timer_del(&it->deadline_timer);
+ it->deadline_armed = false;
+ }
+ it->err_code = err ? err : -EIO;
+ it->hard_timed_out = true;
+ it->status = SCSI_STATUS_TIMEOUT;
+
+ if (it->task) {
+ iscsi_scsi_cancel_task(s->iscsi, it->task);
+ }
+
+ if (it->co) {
+ replay_bh_schedule_oneshot_event(s->aio_context,
+ iscsi_co_generic_bh_cb, it);
+ } else {
+ it->complete = 1;
+ }
+ QTAILQ_REMOVE(&s->inflight, it, entry);
+ it->on_list = false;
+ n++;
+ }
+}
+
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
- void *command_data, void *opaque)
+ void *command_data, void *opaque)
{
struct IscsiTask *iTask = opaque;
struct scsi_task *task = command_data;
+ if (iTask->deadline_armed) {
+ timer_del(&iTask->deadline_timer);
+ iTask->deadline_armed = false;
+ }
+
iTask->status = status;
iTask->do_retry = 0;
iTask->err_code = 0;
iTask->task = task;
if (status != SCSI_STATUS_GOOD) {
- iTask->err_code = -EIO;
- if (iTask->retries++ < ISCSI_CMD_RETRIES) {
- if (status == SCSI_STATUS_BUSY ||
- status == SCSI_STATUS_TIMEOUT ||
- status == SCSI_STATUS_TASK_SET_FULL) {
- unsigned retry_time =
- exp_random(iscsi_retry_times[iTask->retries - 1]);
- if (status == SCSI_STATUS_TIMEOUT) {
- /* make sure the request is rescheduled AFTER the
- * reconnect is initiated */
- retry_time = EVENT_INTERVAL * 2;
- iTask->iscsilun->request_timed_out = true;
- }
- error_report("iSCSI Busy/TaskSetFull/TimeOut"
- " (retry #%u in %u ms): %s",
- iTask->retries, retry_time,
- iscsi_get_error(iscsi));
- aio_timer_init(iTask->iscsilun->aio_context,
- &iTask->retry_timer, QEMU_CLOCK_REALTIME,
- SCALE_MS, iscsi_retry_timer_expired, iTask);
- timer_mod(&iTask->retry_timer,
- qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
- iTask->do_retry = 1;
- return;
- } else if (status == SCSI_STATUS_CHECK_CONDITION) {
- int error = iscsi_translate_sense(&task->sense);
- if (error == EAGAIN) {
- error_report("iSCSI CheckCondition: %s",
+ if (iTask->hard_timed_out) {
+ iTask->err_code = -ETIMEDOUT;
+ iTask->err_str = g_strdup("iSCSI hard timeout");
+ iTask->do_retry = 0;
+ } else {
+ iTask->err_code = -EIO;
+ if (iTask->retries++ < ISCSI_CMD_RETRIES) {
+ if (status == SCSI_STATUS_BUSY ||
+ status == SCSI_STATUS_TIMEOUT ||
+ status == SCSI_STATUS_TASK_SET_FULL) {
+ unsigned retry_time =
+ exp_random(iscsi_retry_times[iTask->retries - 1]);
+ if (status == SCSI_STATUS_TIMEOUT) {
+ /*
+ * make sure the request is rescheduled AFTER the
+ * reconnect is initiated
+ */
+ retry_time = EVENT_INTERVAL * 2;
+ iTask->iscsilun->request_timed_out = true;
+ }
+ error_report("iSCSI Busy/TaskSetFull/TimeOut"
+ " (retry #%u in %u ms): %s",
+ iTask->retries, retry_time,
iscsi_get_error(iscsi));
+ aio_timer_init(iTask->iscsilun->aio_context,
+ &iTask->retry_timer, QEMU_CLOCK_REALTIME,
+ SCALE_MS, iscsi_retry_timer_expired, iTask);
+ timer_mod(&iTask->retry_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
iTask->do_retry = 1;
+ return;
+ } else if (status == SCSI_STATUS_CHECK_CONDITION) {
+ int error = iscsi_translate_sense(&task->sense);
+ if (error == EAGAIN) {
+ error_report("iSCSI CheckCondition: %s",
+ iscsi_get_error(iscsi));
+ iTask->do_retry = 1;
+ } else {
+ iTask->err_code = -error;
+ iTask->err_str = g_strdup(iscsi_get_error(iscsi));
+ }
} else {
- iTask->err_code = -error;
+ if (!iTask->err_str) {
+ iTask->err_str = g_strdup(iscsi_get_error(iscsi));
+ }
+ }
+ } else {
+ if (!iTask->err_str) {
iTask->err_str = g_strdup(iscsi_get_error(iscsi));
}
}
}
}
-
if (iTask->co) {
replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
iscsi_co_generic_bh_cb, iTask);
} else {
iTask->complete = 1;
+ if (iTask->task) {
+ scsi_free_scsi_task(iTask->task);
+ iTask->task = NULL;
+ }
+ g_free(iTask->err_str);
+ g_free(iTask);
}
+
}
static void coroutine_fn
iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
{
*iTask = (struct IscsiTask) {
- .co = qemu_coroutine_self(),
- .iscsilun = iscsilun,
+ .co = qemu_coroutine_self(),
+ .iscsilun = iscsilun,
+ .deadline_armed = false,
+ .hard_timed_out = false,
+ .first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME),
+ .on_list = false,
};
}
+static IscsiTask * coroutine_fn iscsi_task_new(IscsiLun *iscsilun)
+{
+ IscsiTask *iTask = g_new0(IscsiTask, 1);
+ iTask->co = qemu_coroutine_self();
+ iTask->iscsilun = iscsilun;
+ iTask->deadline_armed = false;
+ iTask->hard_timed_out = false;
+ iTask->first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ iTask->on_list = false;
+ return iTask;
+}
+
#ifdef __linux__
/* Called (via iscsi_service) with QemuMutex held. */
@@ -371,17 +458,85 @@ iscsi_set_events(IscsiLun *iscsilun)
}
}
+/* Try to (re)connect, but throttle to avoid storms. */
+static void iscsi_maybe_reconnect(IscsiLun *iscsilun)
+{
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ if (now < iscsilun->next_reconnect_ms) {
+ return;
+ }
+ iscsi_reconnect(iscsilun->iscsi);
+ iscsilun->next_reconnect_ms = now + 2000; /* 2s throttle */
+ /* After changing connection state, refresh event mask immediately. */
+ iscsi_set_events(iscsilun);
+}
+
+static void iscsi_deadline_timer_expired(void *opaque)
+{
+ struct IscsiTask *iTask = opaque;
+ IscsiLun *iscsilun = iTask->iscsilun;
+
+ if (!iTask->deadline_armed) {
+ return;
+ }
+ iTask->deadline_armed = false;
+ iTask->hard_timed_out = true;
+ iTask->status = SCSI_STATUS_TIMEOUT;
+ iTask->err_code = -ETIMEDOUT;
+
+ if (iscsilun) {
+ qemu_mutex_lock(&iscsilun->mutex);
+ if (iTask->task) {
+ iscsi_scsi_cancel_task(iscsilun->iscsi, iTask->task);
+ if (iTask->co) {
+ replay_bh_schedule_oneshot_event(iscsilun->aio_context,
+ iscsi_co_generic_bh_cb, iTask);
+ }
+ iscsi_set_events(iscsilun);
+ }
+ qemu_mutex_unlock(&iscsilun->mutex);
+ }
+}
+
+static inline void iscsi_arm_deadline(struct IscsiTask *iTask)
+{
+ IscsiLun *iscsilun = iTask->iscsilun;
+
+ if (!iscsilun->io_hard_timeout_ms || iTask->deadline_armed) {
+ return;
+ }
+ aio_timer_init(iscsilun->aio_context, &iTask->deadline_timer,
+ QEMU_CLOCK_REALTIME, SCALE_MS,
+ iscsi_deadline_timer_expired, iTask);
+ timer_mod(&iTask->deadline_timer,
+ iTask->first_submit_ms + iscsilun->io_hard_timeout_ms);
+ iTask->deadline_armed = true;
+}
+
static void iscsi_timed_check_events(void *opaque)
{
IscsiLun *iscsilun = opaque;
WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
+ bool logged_in_before = iscsilun->last_logged_in;
+ bool logged_in_now;
/* check for timed out requests */
iscsi_service(iscsilun->iscsi, 0);
+ logged_in_now = iscsi_is_logged_in(iscsilun->iscsi);
+ if (logged_in_before != logged_in_now) {
+ iscsilun->last_logged_in = logged_in_now;
+ if (logged_in_before && !logged_in_now && iscsilun->fail_fast) {
+ iscsi_fail_inflight(iscsilun, -ENOTCONN);
+ }
+ }
if (iscsilun->request_timed_out) {
iscsilun->request_timed_out = false;
- iscsi_reconnect(iscsilun->iscsi);
+ iscsi_maybe_reconnect(iscsilun);
+ }
+
+ if (!logged_in_now) {
+ iscsi_maybe_reconnect(iscsilun);
}
/*
@@ -605,7 +760,7 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *iov, int flags)
{
IscsiLun *iscsilun = bs->opaque;
- struct IscsiTask iTask;
+ IscsiTask *iTask;
uint64_t lba;
uint32_t num_sectors;
bool fua = flags & BDRV_REQ_FUA;
@@ -624,21 +779,27 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
- iscsi_co_init_iscsitask(iscsilun, &iTask);
+ iTask = iscsi_task_new(iscsilun);
qemu_mutex_lock(&iscsilun->mutex);
+ if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+ qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask->err_str);
+ g_free(iTask);
+ return -ENOTCONN;
+ }
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
- iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iTask->task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
iscsilun->block_size, 0, 0, fua, 0, 0,
- iscsi_co_generic_cb, &iTask,
+ iscsi_co_generic_cb, iTask,
(struct scsi_iovec *)iov->iov, iov->niov);
} else {
- iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iTask->task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
iscsilun->block_size, 0, 0, fua, 0, 0,
- iscsi_co_generic_cb, &iTask,
+ iscsi_co_generic_cb, iTask,
(struct scsi_iovec *)iov->iov, iov->niov);
}
#else
@@ -653,41 +814,62 @@ retry:
iscsi_co_generic_cb, &iTask);
}
#endif
- if (iTask.task == NULL) {
+ if (iTask->task == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
iov->niov);
#endif
- iscsi_co_wait_for_task(&iTask, iscsilun);
+ iscsi_set_events(iscsilun);
+ if (!iTask->on_list) {
+ QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = true;
+ }
+ iscsi_arm_deadline(iTask);
+ iscsi_co_wait_for_task(iTask, iscsilun);
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
+ if (!iTask->hard_timed_out && iTask->task != NULL) {
+ scsi_free_scsi_task(iTask->task);
+ iTask->task = NULL;
}
- if (iTask.do_retry) {
- iTask.complete = 0;
+ if (iTask->do_retry) {
+ iTask->complete = 0;
goto retry;
}
- if (iTask.status != SCSI_STATUS_GOOD) {
+ if (iTask->hard_timed_out) {
+ r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+ iTask->co = NULL;
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
+ qemu_mutex_unlock(&iscsilun->mutex);
+ return r;
+ }
+
+ if (iTask->status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE);
error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
- iTask.err_str);
- r = iTask.err_code;
- goto out_unlock;
+ iTask->err_str);
+ r = iTask->err_code;
+ } else {
+ iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
}
- iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE);
-
-out_unlock:
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
qemu_mutex_unlock(&iscsilun->mutex);
- g_free(iTask.err_str);
+ g_free(iTask->err_str);
+ g_free(iTask);
return r;
}
@@ -733,6 +915,8 @@ retry:
ret = -ENOMEM;
goto out_unlock;
}
+ iscsi_arm_deadline(&iTask);
+ iscsi_set_events(iscsilun);
iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.do_retry) {
@@ -801,7 +985,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
QEMUIOVector *iov)
{
IscsiLun *iscsilun = bs->opaque;
- struct IscsiTask iTask;
+ IscsiTask *iTask;
uint64_t lba;
uint32_t num_sectors;
int r = 0;
@@ -856,22 +1040,28 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
- iscsi_co_init_iscsitask(iscsilun, &iTask);
+ iTask = iscsi_task_new(iscsilun);
qemu_mutex_lock(&iscsilun->mutex);
+ if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+ qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask->err_str);
+ g_free(iTask);
+ return -ENOTCONN;
+ }
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
- iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iTask->task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
num_sectors * iscsilun->block_size,
iscsilun->block_size, 0, 0, 0, 0, 0,
- iscsi_co_generic_cb, &iTask,
+ iscsi_co_generic_cb, iTask,
(struct scsi_iovec *)iov->iov, iov->niov);
} else {
- iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iTask->task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
num_sectors * iscsilun->block_size,
iscsilun->block_size,
0, 0, 0, 0, 0,
- iscsi_co_generic_cb, &iTask,
+ iscsi_co_generic_cb, iTask,
(struct scsi_iovec *)iov->iov, iov->niov);
}
#else
@@ -887,70 +1077,119 @@ retry:
iscsi_co_generic_cb, &iTask);
}
#endif
- if (iTask.task == NULL) {
+ if (iTask->task == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask);
return -ENOMEM;
}
#if LIBISCSI_API_VERSION < (20160603)
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
#endif
-
- iscsi_co_wait_for_task(&iTask, iscsilun);
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
+ if (!iTask->on_list) {
+ QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = true;
+ }
+ iscsi_arm_deadline(iTask);
+ iscsi_co_wait_for_task(iTask, iscsilun);
+ if (!iTask->hard_timed_out && iTask->task != NULL) {
+ scsi_free_scsi_task(iTask->task);
+ iTask->task = NULL;
}
- if (iTask.do_retry) {
- iTask.complete = 0;
+ if (iTask->do_retry) {
+ iTask->complete = 0;
goto retry;
}
- if (iTask.status != SCSI_STATUS_GOOD) {
+ if (iTask->hard_timed_out) {
+ r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+ iTask->co = NULL;
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
+ qemu_mutex_unlock(&iscsilun->mutex);
+ return r;
+ }
+
+ if (iTask->status != SCSI_STATUS_GOOD) {
error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
- lba, iTask.err_str);
- r = iTask.err_code;
+ lba, iTask->err_str);
+ r = iTask->err_code;
}
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
qemu_mutex_unlock(&iscsilun->mutex);
- g_free(iTask.err_str);
+ g_free(iTask->err_str);
+ g_free(iTask);
return r;
}
static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
- struct IscsiTask iTask;
+ IscsiTask *iTask;
int r = 0;
- iscsi_co_init_iscsitask(iscsilun, &iTask);
+ iTask = iscsi_task_new(iscsilun);
qemu_mutex_lock(&iscsilun->mutex);
+ if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+ qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask->err_str);
+ g_free(iTask);
+ return -ENOTCONN;
+ }
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
- 0, iscsi_co_generic_cb, &iTask) == NULL) {
+ 0, iscsi_co_generic_cb, iTask) == NULL) {
qemu_mutex_unlock(&iscsilun->mutex);
+ g_free(iTask);
return -ENOMEM;
}
+ iscsi_set_events(iscsilun);
+ if (!iTask->on_list) {
+ QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = true;
+ }
+ iscsi_arm_deadline(iTask);
+ iscsi_co_wait_for_task(iTask, iscsilun);
- iscsi_co_wait_for_task(&iTask, iscsilun);
-
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
+ if (!iTask->hard_timed_out && iTask->task != NULL) {
+ scsi_free_scsi_task(iTask->task);
+ iTask->task = NULL;
}
- if (iTask.do_retry) {
- iTask.complete = 0;
+ if (iTask->do_retry) {
+ iTask->complete = 0;
goto retry;
}
- if (iTask.status != SCSI_STATUS_GOOD) {
- error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
- r = iTask.err_code;
+ if (iTask->hard_timed_out) {
+ r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+ iTask->co = NULL; /* detach */
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
+ qemu_mutex_unlock(&iscsilun->mutex);
+ return r;
}
+ if (iTask->status != SCSI_STATUS_GOOD) {
+ error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask->err_str);
+ r = iTask->err_code;
+ }
+
+ if (iTask->on_list) {
+ QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+ iTask->on_list = false;
+ }
qemu_mutex_unlock(&iscsilun->mutex);
- g_free(iTask.err_str);
+ g_free(iTask->err_str);
+ g_free(iTask);
return r;
}
@@ -1086,6 +1325,12 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
data.size = 0;
qemu_mutex_lock(&iscsilun->mutex);
+ if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+ qemu_mutex_unlock(&iscsilun->mutex);
+ acb->status = -ENOTCONN;
+ iscsi_schedule_bh(acb);
+ return &acb->common;
+ }
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
if (acb->ioh->iovec_count == 0) {
data.data = acb->ioh->dxferp;
@@ -1176,6 +1421,7 @@ retry:
goto out_unlock;
}
+ iscsi_set_events(iscsilun);
iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.task != NULL) {
@@ -1282,6 +1528,7 @@ retry:
return -ENOMEM;
}
+ iscsi_set_events(iscsilun);
iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1415,14 +1662,24 @@ static void iscsi_nop_timed_event(void *opaque)
IscsiLun *iscsilun = opaque;
QEMU_LOCK_GUARD(&iscsilun->mutex);
+ /* If we are not logged in, use the nop timer as an additional reconnect driver. */
+ if (!iscsi_is_logged_in(iscsilun->iscsi)) {
+ iscsilun->request_timed_out = true;
+ iscsi_maybe_reconnect(iscsilun);
+ goto rearm;
+ }
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
iscsilun->request_timed_out = true;
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
- error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
- return;
+ /* Do NOT disable NOPs; treat as connection problem and try to reconnect. */
+ error_report("iSCSI: failed to send NOP-Out. Triggering reconnect.");
+ iscsilun->request_timed_out = true;
+ iscsi_maybe_reconnect(iscsilun);
+ /* keep NOPs enabled; next tick will try again */
}
+rearm:
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
}
@@ -1559,6 +1816,8 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
IscsiLun *iscsilun = bs->opaque;
iscsilun->aio_context = new_context;
+ iscsilun->next_reconnect_ms = 0;
+ iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
iscsi_set_events(iscsilun);
/* Set up a timer for sending out iSCSI NOPs */
@@ -1894,6 +2153,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
}
#endif
+ /* FORCE-ON policy: 5s hard timeout */
+ iscsilun->io_hard_timeout_ms = 5000; /* 5 seconds */
+ iscsilun->fail_fast = true;
if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
@@ -1905,6 +2167,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->iscsi = iscsi;
iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = lun;
+ iscsilun->next_reconnect_ms = 0;
+ iscsilun->last_logged_in = false; /* updated after connect */
iscsilun->has_write_same = true;
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -2007,6 +2271,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
qemu_mutex_init(&iscsilun->mutex);
iscsi_attach_aio_context(bs, iscsilun->aio_context);
+ iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
+ QTAILQ_INIT(&iscsilun->inflight);
/* Guess the internal cluster (page) size of the iscsi target by the means
* of opt_unmap_gran. Transfer the unmap granularity only if it has a
@@ -2387,6 +2653,7 @@ retry:
goto out_unlock;
}
+ iscsi_set_events(dst_lun);
iscsi_co_wait_for_task(&iscsi_task, dst_lun);
if (iscsi_task.do_retry) {
--
2.25.1
I would like to withdraw this patch series. Sorry for the inconvenience, and thank you for your understanding. CJ Chen <cjchen@igel.co.jp> 於 2025年9月1日 週一 下午6:20寫道: > > When an iSCSI session drops or an I/O stalls past a hard timeout, > requests could complete after teardown and coroutines might be woken > in an invalid state. This patch clarifies task ownership and hardens > completion paths. > > Changes: > - Add a per-command deadline (I/O hard timeout). On expiry, cancel > the libiscsi task, detach the coroutine (co = NULL), and let the > callback free the task safely. > - Track inflight tasks and fail them promptly on session disconnect > when fail-fast is enabled. Throttle reconnect attempts and drive > them via the periodic timer and NOP keepalives. > - Always refresh the fd event mask after state changes. Tidy event, > read, and write handlers, and remove unused labels/duplicates. > - Arm deadlines only for heap-allocated tasks (read/write/flush). > Stack-based helper paths continue to wait synchronously without > deadlines. > > User-visible effect: under error conditions we now return -ETIMEDOUT > or -ENOTCONN instead of hanging or crashing. Normal I/O behavior is > unchanged. Internally, a 5s hard timeout with fail-fast is enforced to > avoid indefinite stalls. > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3067 > > Signed-off-by: CJ Chen <cjchen@igel.co.jp> > --- > block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 358 insertions(+), 91 deletions(-) > > diff --git a/block/iscsi.c b/block/iscsi.c > index 15b96ee880..094b51c47c 100644 > --- a/block/iscsi.c > +++ b/block/iscsi.c > @@ -49,6 +49,7 @@ > #include "crypto/secret.h" > #include "scsi/utils.h" > #include "trace.h" > +#include "qemu/timer.h" > > /* Conflict between scsi/utils.h and libiscsi! :( */ > #define SCSI_XFER_NONE ISCSI_XFER_NONE > @@ -74,6 +75,8 @@ typedef struct IscsiLun { > QEMUTimer *nop_timer; > QEMUTimer *event_timer; > QemuMutex mutex; > + int64_t next_reconnect_ms; /* throttle repeated reconnects */ > + bool last_logged_in; /* for state-change logging */ > struct scsi_inquiry_logical_block_provisioning lbp; > struct scsi_inquiry_block_limits bl; > struct scsi_inquiry_device_designator *dd; > @@ -103,6 +106,9 @@ typedef struct IscsiLun { > bool dpofua; > bool has_write_same; > bool request_timed_out; > + uint32_t io_hard_timeout_ms; > + bool fail_fast; > + QTAILQ_HEAD(, IscsiTask) inflight; > } IscsiLun; > > typedef struct IscsiTask { > @@ -116,6 +122,12 @@ typedef struct IscsiTask { > QEMUTimer retry_timer; > int err_code; > char *err_str; > + QEMUTimer deadline_timer; > + bool deadline_armed; > + bool hard_timed_out; > + int64_t first_submit_ms; > + QTAILQ_ENTRY(IscsiTask) entry; > + bool on_list; > } IscsiTask; > > typedef struct IscsiAIOCB { > @@ -185,7 +197,9 @@ static void iscsi_co_generic_bh_cb(void *opaque) > struct IscsiTask *iTask = opaque; > > iTask->complete = 1; > - aio_co_wake(iTask->co); > + if (iTask->co) { > + aio_co_wake(iTask->co); > + } > } > > static void iscsi_retry_timer_expired(void *opaque) > @@ -232,75 +246,148 @@ static int iscsi_translate_sense(struct scsi_sense *sense) > sense->ascq & 0xFF); > } > > +static void iscsi_fail_inflight(IscsiLun *s, int err) > +{ > + IscsiTask *it, *next; > + int n = 0; > + > + QTAILQ_FOREACH_SAFE(it, &s->inflight, entry, next) { > + if (it->deadline_armed) { > + timer_del(&it->deadline_timer); > + it->deadline_armed = false; > + } > + it->err_code = err ? err : -EIO; > + it->hard_timed_out = true; > + it->status = SCSI_STATUS_TIMEOUT; > + > + if (it->task) { > + iscsi_scsi_cancel_task(s->iscsi, it->task); > + } > + > + if (it->co) { > + replay_bh_schedule_oneshot_event(s->aio_context, > + iscsi_co_generic_bh_cb, it); > + } else { > + it->complete = 1; > + } > + QTAILQ_REMOVE(&s->inflight, it, entry); > + it->on_list = false; > + n++; > + } > +} > + > /* Called (via iscsi_service) with QemuMutex held. */ > static void > iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, > - void *command_data, void *opaque) > + void *command_data, void *opaque) > { > struct IscsiTask *iTask = opaque; > struct scsi_task *task = command_data; > > + if (iTask->deadline_armed) { > + timer_del(&iTask->deadline_timer); > + iTask->deadline_armed = false; > + } > + > iTask->status = status; > iTask->do_retry = 0; > iTask->err_code = 0; > iTask->task = task; > > if (status != SCSI_STATUS_GOOD) { > - iTask->err_code = -EIO; > - if (iTask->retries++ < ISCSI_CMD_RETRIES) { > - if (status == SCSI_STATUS_BUSY || > - status == SCSI_STATUS_TIMEOUT || > - status == SCSI_STATUS_TASK_SET_FULL) { > - unsigned retry_time = > - exp_random(iscsi_retry_times[iTask->retries - 1]); > - if (status == SCSI_STATUS_TIMEOUT) { > - /* make sure the request is rescheduled AFTER the > - * reconnect is initiated */ > - retry_time = EVENT_INTERVAL * 2; > - iTask->iscsilun->request_timed_out = true; > - } > - error_report("iSCSI Busy/TaskSetFull/TimeOut" > - " (retry #%u in %u ms): %s", > - iTask->retries, retry_time, > - iscsi_get_error(iscsi)); > - aio_timer_init(iTask->iscsilun->aio_context, > - &iTask->retry_timer, QEMU_CLOCK_REALTIME, > - SCALE_MS, iscsi_retry_timer_expired, iTask); > - timer_mod(&iTask->retry_timer, > - qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); > - iTask->do_retry = 1; > - return; > - } else if (status == SCSI_STATUS_CHECK_CONDITION) { > - int error = iscsi_translate_sense(&task->sense); > - if (error == EAGAIN) { > - error_report("iSCSI CheckCondition: %s", > + if (iTask->hard_timed_out) { > + iTask->err_code = -ETIMEDOUT; > + iTask->err_str = g_strdup("iSCSI hard timeout"); > + iTask->do_retry = 0; > + } else { > + iTask->err_code = -EIO; > + if (iTask->retries++ < ISCSI_CMD_RETRIES) { > + if (status == SCSI_STATUS_BUSY || > + status == SCSI_STATUS_TIMEOUT || > + status == SCSI_STATUS_TASK_SET_FULL) { > + unsigned retry_time = > + exp_random(iscsi_retry_times[iTask->retries - 1]); > + if (status == SCSI_STATUS_TIMEOUT) { > + /* > + * make sure the request is rescheduled AFTER the > + * reconnect is initiated > + */ > + retry_time = EVENT_INTERVAL * 2; > + iTask->iscsilun->request_timed_out = true; > + } > + error_report("iSCSI Busy/TaskSetFull/TimeOut" > + " (retry #%u in %u ms): %s", > + iTask->retries, retry_time, > iscsi_get_error(iscsi)); > + aio_timer_init(iTask->iscsilun->aio_context, > + &iTask->retry_timer, QEMU_CLOCK_REALTIME, > + SCALE_MS, iscsi_retry_timer_expired, iTask); > + timer_mod(&iTask->retry_timer, > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); > iTask->do_retry = 1; > + return; > + } else if (status == SCSI_STATUS_CHECK_CONDITION) { > + int error = iscsi_translate_sense(&task->sense); > + if (error == EAGAIN) { > + error_report("iSCSI CheckCondition: %s", > + iscsi_get_error(iscsi)); > + iTask->do_retry = 1; > + } else { > + iTask->err_code = -error; > + iTask->err_str = g_strdup(iscsi_get_error(iscsi)); > + } > } else { > - iTask->err_code = -error; > + if (!iTask->err_str) { > + iTask->err_str = g_strdup(iscsi_get_error(iscsi)); > + } > + } > + } else { > + if (!iTask->err_str) { > iTask->err_str = g_strdup(iscsi_get_error(iscsi)); > } > } > } > } > - > if (iTask->co) { > replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context, > iscsi_co_generic_bh_cb, iTask); > } else { > iTask->complete = 1; > + if (iTask->task) { > + scsi_free_scsi_task(iTask->task); > + iTask->task = NULL; > + } > + g_free(iTask->err_str); > + g_free(iTask); > } > + > } > > static void coroutine_fn > iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) > { > *iTask = (struct IscsiTask) { > - .co = qemu_coroutine_self(), > - .iscsilun = iscsilun, > + .co = qemu_coroutine_self(), > + .iscsilun = iscsilun, > + .deadline_armed = false, > + .hard_timed_out = false, > + .first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME), > + .on_list = false, > }; > } > > +static IscsiTask * coroutine_fn iscsi_task_new(IscsiLun *iscsilun) > +{ > + IscsiTask *iTask = g_new0(IscsiTask, 1); > + iTask->co = qemu_coroutine_self(); > + iTask->iscsilun = iscsilun; > + iTask->deadline_armed = false; > + iTask->hard_timed_out = false; > + iTask->first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); > + iTask->on_list = false; > + return iTask; > +} > + > #ifdef __linux__ > > /* Called (via iscsi_service) with QemuMutex held. */ > @@ -371,17 +458,85 @@ iscsi_set_events(IscsiLun *iscsilun) > } > } > > +/* Try to (re)connect, but throttle to avoid storms. */ > +static void iscsi_maybe_reconnect(IscsiLun *iscsilun) > +{ > + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); > + if (now < iscsilun->next_reconnect_ms) { > + return; > + } > + iscsi_reconnect(iscsilun->iscsi); > + iscsilun->next_reconnect_ms = now + 2000; /* 2s throttle */ > + /* After changing connection state, refresh event mask immediately. */ > + iscsi_set_events(iscsilun); > +} > + > +static void iscsi_deadline_timer_expired(void *opaque) > +{ > + struct IscsiTask *iTask = opaque; > + IscsiLun *iscsilun = iTask->iscsilun; > + > + if (!iTask->deadline_armed) { > + return; > + } > + iTask->deadline_armed = false; > + iTask->hard_timed_out = true; > + iTask->status = SCSI_STATUS_TIMEOUT; > + iTask->err_code = -ETIMEDOUT; > + > + if (iscsilun) { > + qemu_mutex_lock(&iscsilun->mutex); > + if (iTask->task) { > + iscsi_scsi_cancel_task(iscsilun->iscsi, iTask->task); > + if (iTask->co) { > + replay_bh_schedule_oneshot_event(iscsilun->aio_context, > + iscsi_co_generic_bh_cb, iTask); > + } > + iscsi_set_events(iscsilun); > + } > + qemu_mutex_unlock(&iscsilun->mutex); > + } > +} > + > +static inline void iscsi_arm_deadline(struct IscsiTask *iTask) > +{ > + IscsiLun *iscsilun = iTask->iscsilun; > + > + if (!iscsilun->io_hard_timeout_ms || iTask->deadline_armed) { > + return; > + } > + aio_timer_init(iscsilun->aio_context, &iTask->deadline_timer, > + QEMU_CLOCK_REALTIME, SCALE_MS, > + iscsi_deadline_timer_expired, iTask); > + timer_mod(&iTask->deadline_timer, > + iTask->first_submit_ms + iscsilun->io_hard_timeout_ms); > + iTask->deadline_armed = true; > +} > + > static void iscsi_timed_check_events(void *opaque) > { > IscsiLun *iscsilun = opaque; > > WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) { > + bool logged_in_before = iscsilun->last_logged_in; > + bool logged_in_now; > /* check for timed out requests */ > iscsi_service(iscsilun->iscsi, 0); > + logged_in_now = iscsi_is_logged_in(iscsilun->iscsi); > + if (logged_in_before != logged_in_now) { > + iscsilun->last_logged_in = logged_in_now; > + if (logged_in_before && !logged_in_now && iscsilun->fail_fast) { > + iscsi_fail_inflight(iscsilun, -ENOTCONN); > + } > + } > > if (iscsilun->request_timed_out) { > iscsilun->request_timed_out = false; > - iscsi_reconnect(iscsilun->iscsi); > + iscsi_maybe_reconnect(iscsilun); > + } > + > + if (!logged_in_now) { > + iscsi_maybe_reconnect(iscsilun); > } > > /* > @@ -605,7 +760,7 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, > QEMUIOVector *iov, int flags) > { > IscsiLun *iscsilun = bs->opaque; > - struct IscsiTask iTask; > + IscsiTask *iTask; > uint64_t lba; > uint32_t num_sectors; > bool fua = flags & BDRV_REQ_FUA; > @@ -624,21 +779,27 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, > > lba = sector_qemu2lun(sector_num, iscsilun); > num_sectors = sector_qemu2lun(nb_sectors, iscsilun); > - iscsi_co_init_iscsitask(iscsilun, &iTask); > + iTask = iscsi_task_new(iscsilun); > qemu_mutex_lock(&iscsilun->mutex); > + if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) { > + qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask->err_str); > + g_free(iTask); > + return -ENOTCONN; > + } > retry: > if (iscsilun->use_16_for_rw) { > #if LIBISCSI_API_VERSION >= (20160603) > - iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > + iTask->task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > NULL, num_sectors * iscsilun->block_size, > iscsilun->block_size, 0, 0, fua, 0, 0, > - iscsi_co_generic_cb, &iTask, > + iscsi_co_generic_cb, iTask, > (struct scsi_iovec *)iov->iov, iov->niov); > } else { > - iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > + iTask->task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > NULL, num_sectors * iscsilun->block_size, > iscsilun->block_size, 0, 0, fua, 0, 0, > - iscsi_co_generic_cb, &iTask, > + iscsi_co_generic_cb, iTask, > (struct scsi_iovec *)iov->iov, iov->niov); > } > #else > @@ -653,41 +814,62 @@ retry: > iscsi_co_generic_cb, &iTask); > } > #endif > - if (iTask.task == NULL) { > + if (iTask->task == NULL) { > qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask); > return -ENOMEM; > } > #if LIBISCSI_API_VERSION < (20160603) > scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, > iov->niov); > #endif > - iscsi_co_wait_for_task(&iTask, iscsilun); > + iscsi_set_events(iscsilun); > + if (!iTask->on_list) { > + QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry); > + iTask->on_list = true; > + } > + iscsi_arm_deadline(iTask); > + iscsi_co_wait_for_task(iTask, iscsilun); > > - if (iTask.task != NULL) { > - scsi_free_scsi_task(iTask.task); > - iTask.task = NULL; > + if (!iTask->hard_timed_out && iTask->task != NULL) { > + scsi_free_scsi_task(iTask->task); > + iTask->task = NULL; > } > > - if (iTask.do_retry) { > - iTask.complete = 0; > + if (iTask->do_retry) { > + iTask->complete = 0; > goto retry; > } > > - if (iTask.status != SCSI_STATUS_GOOD) { > + if (iTask->hard_timed_out) { > + r = iTask->err_code ? iTask->err_code : -ETIMEDOUT; > + iTask->co = NULL; > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > + qemu_mutex_unlock(&iscsilun->mutex); > + return r; > + } > + > + if (iTask->status != SCSI_STATUS_GOOD) { > iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE, > nb_sectors * BDRV_SECTOR_SIZE); > error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba, > - iTask.err_str); > - r = iTask.err_code; > - goto out_unlock; > + iTask->err_str); > + r = iTask->err_code; > + } else { > + iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, > + nb_sectors * BDRV_SECTOR_SIZE); > } > > - iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, > - nb_sectors * BDRV_SECTOR_SIZE); > - > -out_unlock: > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > qemu_mutex_unlock(&iscsilun->mutex); > - g_free(iTask.err_str); > + g_free(iTask->err_str); > + g_free(iTask); > return r; > } > > @@ -733,6 +915,8 @@ retry: > ret = -ENOMEM; > goto out_unlock; > } > + iscsi_arm_deadline(&iTask); > + iscsi_set_events(iscsilun); > iscsi_co_wait_for_task(&iTask, iscsilun); > > if (iTask.do_retry) { > @@ -801,7 +985,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, > QEMUIOVector *iov) > { > IscsiLun *iscsilun = bs->opaque; > - struct IscsiTask iTask; > + IscsiTask *iTask; > uint64_t lba; > uint32_t num_sectors; > int r = 0; > @@ -856,22 +1040,28 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, > lba = sector_qemu2lun(sector_num, iscsilun); > num_sectors = sector_qemu2lun(nb_sectors, iscsilun); > > - iscsi_co_init_iscsitask(iscsilun, &iTask); > + iTask = iscsi_task_new(iscsilun); > qemu_mutex_lock(&iscsilun->mutex); > + if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) { > + qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask->err_str); > + g_free(iTask); > + return -ENOTCONN; > + } > retry: > if (iscsilun->use_16_for_rw) { > #if LIBISCSI_API_VERSION >= (20160603) > - iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > + iTask->task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > num_sectors * iscsilun->block_size, > iscsilun->block_size, 0, 0, 0, 0, 0, > - iscsi_co_generic_cb, &iTask, > + iscsi_co_generic_cb, iTask, > (struct scsi_iovec *)iov->iov, iov->niov); > } else { > - iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > + iTask->task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba, > num_sectors * iscsilun->block_size, > iscsilun->block_size, > 0, 0, 0, 0, 0, > - iscsi_co_generic_cb, &iTask, > + iscsi_co_generic_cb, iTask, > (struct scsi_iovec *)iov->iov, iov->niov); > } > #else > @@ -887,70 +1077,119 @@ retry: > iscsi_co_generic_cb, &iTask); > } > #endif > - if (iTask.task == NULL) { > + if (iTask->task == NULL) { > qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask); > return -ENOMEM; > } > #if LIBISCSI_API_VERSION < (20160603) > scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); > #endif > - > - iscsi_co_wait_for_task(&iTask, iscsilun); > - if (iTask.task != NULL) { > - scsi_free_scsi_task(iTask.task); > - iTask.task = NULL; > + if (!iTask->on_list) { > + QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry); > + iTask->on_list = true; > + } > + iscsi_arm_deadline(iTask); > + iscsi_co_wait_for_task(iTask, iscsilun); > + if (!iTask->hard_timed_out && iTask->task != NULL) { > + scsi_free_scsi_task(iTask->task); > + iTask->task = NULL; > } > > - if (iTask.do_retry) { > - iTask.complete = 0; > + if (iTask->do_retry) { > + iTask->complete = 0; > goto retry; > } > > - if (iTask.status != SCSI_STATUS_GOOD) { > + if (iTask->hard_timed_out) { > + r = iTask->err_code ? iTask->err_code : -ETIMEDOUT; > + iTask->co = NULL; > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > + qemu_mutex_unlock(&iscsilun->mutex); > + return r; > + } > + > + if (iTask->status != SCSI_STATUS_GOOD) { > error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s", > - lba, iTask.err_str); > - r = iTask.err_code; > + lba, iTask->err_str); > + r = iTask->err_code; > } > > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > qemu_mutex_unlock(&iscsilun->mutex); > - g_free(iTask.err_str); > + g_free(iTask->err_str); > + g_free(iTask); > return r; > } > > static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) > { > IscsiLun *iscsilun = bs->opaque; > - struct IscsiTask iTask; > + IscsiTask *iTask; > int r = 0; > > - iscsi_co_init_iscsitask(iscsilun, &iTask); > + iTask = iscsi_task_new(iscsilun); > qemu_mutex_lock(&iscsilun->mutex); > + if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) { > + qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask->err_str); > + g_free(iTask); > + return -ENOTCONN; > + } > retry: > if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, > - 0, iscsi_co_generic_cb, &iTask) == NULL) { > + 0, iscsi_co_generic_cb, iTask) == NULL) { > qemu_mutex_unlock(&iscsilun->mutex); > + g_free(iTask); > return -ENOMEM; > } > + iscsi_set_events(iscsilun); > + if (!iTask->on_list) { > + QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry); > + iTask->on_list = true; > + } > + iscsi_arm_deadline(iTask); > + iscsi_co_wait_for_task(iTask, iscsilun); > > - iscsi_co_wait_for_task(&iTask, iscsilun); > - > - if (iTask.task != NULL) { > - scsi_free_scsi_task(iTask.task); > - iTask.task = NULL; > + if (!iTask->hard_timed_out && iTask->task != NULL) { > + scsi_free_scsi_task(iTask->task); > + iTask->task = NULL; > } > > - if (iTask.do_retry) { > - iTask.complete = 0; > + if (iTask->do_retry) { > + iTask->complete = 0; > goto retry; > } > > - if (iTask.status != SCSI_STATUS_GOOD) { > - error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str); > - r = iTask.err_code; > + if (iTask->hard_timed_out) { > + r = iTask->err_code ? iTask->err_code : -ETIMEDOUT; > + iTask->co = NULL; /* detach */ > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > + qemu_mutex_unlock(&iscsilun->mutex); > + return r; > } > > + if (iTask->status != SCSI_STATUS_GOOD) { > + error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask->err_str); > + r = iTask->err_code; > + } > + > + if (iTask->on_list) { > + QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry); > + iTask->on_list = false; > + } > qemu_mutex_unlock(&iscsilun->mutex); > - g_free(iTask.err_str); > + g_free(iTask->err_str); > + g_free(iTask); > return r; > } > > @@ -1086,6 +1325,12 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, > > data.size = 0; > qemu_mutex_lock(&iscsilun->mutex); > + if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) { > + qemu_mutex_unlock(&iscsilun->mutex); > + acb->status = -ENOTCONN; > + iscsi_schedule_bh(acb); > + return &acb->common; > + } > if (acb->task->xfer_dir == SCSI_XFER_WRITE) { > if (acb->ioh->iovec_count == 0) { > data.data = acb->ioh->dxferp; > @@ -1176,6 +1421,7 @@ retry: > goto out_unlock; > } > > + iscsi_set_events(iscsilun); > iscsi_co_wait_for_task(&iTask, iscsilun); > > if (iTask.task != NULL) { > @@ -1282,6 +1528,7 @@ retry: > return -ENOMEM; > } > > + iscsi_set_events(iscsilun); > iscsi_co_wait_for_task(&iTask, iscsilun); > > if (iTask.status == SCSI_STATUS_CHECK_CONDITION && > @@ -1415,14 +1662,24 @@ static void iscsi_nop_timed_event(void *opaque) > IscsiLun *iscsilun = opaque; > > QEMU_LOCK_GUARD(&iscsilun->mutex); > + /* If we are not logged in, use the nop timer as an additional reconnect driver. */ > + if (!iscsi_is_logged_in(iscsilun->iscsi)) { > + iscsilun->request_timed_out = true; > + iscsi_maybe_reconnect(iscsilun); > + goto rearm; > + } > if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) { > error_report("iSCSI: NOP timeout. Reconnecting..."); > iscsilun->request_timed_out = true; > } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) { > - error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages."); > - return; > + /* Do NOT disable NOPs; treat as connection problem and try to reconnect. */ > + error_report("iSCSI: failed to send NOP-Out. Triggering reconnect."); > + iscsilun->request_timed_out = true; > + iscsi_maybe_reconnect(iscsilun); > + /* keep NOPs enabled; next tick will try again */ > } > > +rearm: > timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); > iscsi_set_events(iscsilun); > } > @@ -1559,6 +1816,8 @@ static void iscsi_attach_aio_context(BlockDriverState *bs, > IscsiLun *iscsilun = bs->opaque; > > iscsilun->aio_context = new_context; > + iscsilun->next_reconnect_ms = 0; > + iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi); > iscsi_set_events(iscsilun); > > /* Set up a timer for sending out iSCSI NOPs */ > @@ -1894,6 +2153,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, > warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0"); > } > #endif > + /* FORCE-ON policy: 5s hard timeout */ > + iscsilun->io_hard_timeout_ms = 5000; /* 5 seconds */ > + iscsilun->fail_fast = true; > > if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) { > error_setg(errp, "iSCSI: Failed to connect to LUN : %s", > @@ -1905,6 +2167,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, > iscsilun->iscsi = iscsi; > iscsilun->aio_context = bdrv_get_aio_context(bs); > iscsilun->lun = lun; > + iscsilun->next_reconnect_ms = 0; > + iscsilun->last_logged_in = false; /* updated after connect */ > iscsilun->has_write_same = true; > > task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0, > @@ -2007,6 +2271,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, > > qemu_mutex_init(&iscsilun->mutex); > iscsi_attach_aio_context(bs, iscsilun->aio_context); > + iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi); > + QTAILQ_INIT(&iscsilun->inflight); > > /* Guess the internal cluster (page) size of the iscsi target by the means > * of opt_unmap_gran. Transfer the unmap granularity only if it has a > @@ -2387,6 +2653,7 @@ retry: > goto out_unlock; > } > > + iscsi_set_events(dst_lun); > iscsi_co_wait_for_task(&iscsi_task, dst_lun); > > if (iscsi_task.do_retry) { > -- > 2.25.1 >
© 2016 - 2025 Red Hat, Inc.