[PATCH] block/iscsi: fix crashes on session disconnect and I/O hard timeout

CJ Chen posted 1 patch 2 weeks, 6 days ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20250901092043.28722-1-cjchen@igel.co.jp
Maintainers: Ronnie Sahlberg <ronniesahlberg@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Peter Lieven <pl@dlhnet.de>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>
block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 358 insertions(+), 91 deletions(-)
[PATCH] block/iscsi: fix crashes on session disconnect and I/O hard timeout
Posted by CJ Chen 2 weeks, 6 days ago
When an iSCSI session drops or an I/O stalls past a hard timeout,
requests could complete after teardown and coroutines might be woken
in an invalid state. This patch clarifies task ownership and hardens
completion paths.

Changes:
 - Add a per-command deadline (I/O hard timeout). On expiry, cancel
   the libiscsi task, detach the coroutine (co = NULL), and let the
   callback free the task safely.
 - Track inflight tasks and fail them promptly on session disconnect
   when fail-fast is enabled. Throttle reconnect attempts and drive
   them via the periodic timer and NOP keepalives.
 - Always refresh the fd event mask after state changes. Tidy event,
   read, and write handlers, and remove unused labels/duplicates.
 - Arm deadlines only for heap-allocated tasks (read/write/flush).
   Stack-based helper paths continue to wait synchronously without
   deadlines.

User-visible effect: under error conditions we now return -ETIMEDOUT
or -ENOTCONN instead of hanging or crashing. Normal I/O behavior is
unchanged. Internally, a 5s hard timeout with fail-fast is enforced to
avoid indefinite stalls.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3067

Signed-off-by: CJ Chen <cjchen@igel.co.jp>
---
 block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 358 insertions(+), 91 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 15b96ee880..094b51c47c 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@
 #include "crypto/secret.h"
 #include "scsi/utils.h"
 #include "trace.h"
+#include "qemu/timer.h"
 
 /* Conflict between scsi/utils.h and libiscsi! :( */
 #define SCSI_XFER_NONE ISCSI_XFER_NONE
@@ -74,6 +75,8 @@ typedef struct IscsiLun {
     QEMUTimer *nop_timer;
     QEMUTimer *event_timer;
     QemuMutex mutex;
+    int64_t next_reconnect_ms;   /* throttle repeated reconnects */
+    bool last_logged_in;         /* for state-change logging */
     struct scsi_inquiry_logical_block_provisioning lbp;
     struct scsi_inquiry_block_limits bl;
     struct scsi_inquiry_device_designator *dd;
@@ -103,6 +106,9 @@ typedef struct IscsiLun {
     bool dpofua;
     bool has_write_same;
     bool request_timed_out;
+    uint32_t io_hard_timeout_ms;
+    bool fail_fast;
+    QTAILQ_HEAD(, IscsiTask) inflight;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -116,6 +122,12 @@ typedef struct IscsiTask {
     QEMUTimer retry_timer;
     int err_code;
     char *err_str;
+    QEMUTimer deadline_timer;
+    bool deadline_armed;
+    bool hard_timed_out;
+    int64_t first_submit_ms;
+    QTAILQ_ENTRY(IscsiTask) entry;
+    bool on_list;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -185,7 +197,9 @@ static void iscsi_co_generic_bh_cb(void *opaque)
     struct IscsiTask *iTask = opaque;
 
     iTask->complete = 1;
-    aio_co_wake(iTask->co);
+    if (iTask->co) {
+        aio_co_wake(iTask->co);
+    }
 }
 
 static void iscsi_retry_timer_expired(void *opaque)
@@ -232,75 +246,148 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
                                sense->ascq & 0xFF);
 }
 
+static void iscsi_fail_inflight(IscsiLun *s, int err)
+{
+    IscsiTask *it, *next;
+    int n = 0;
+
+    QTAILQ_FOREACH_SAFE(it, &s->inflight, entry, next) {
+        if (it->deadline_armed) {
+            timer_del(&it->deadline_timer);
+            it->deadline_armed = false;
+        }
+        it->err_code = err ? err : -EIO;
+        it->hard_timed_out = true;
+        it->status = SCSI_STATUS_TIMEOUT;
+
+        if (it->task) {
+            iscsi_scsi_cancel_task(s->iscsi, it->task);
+        }
+
+        if (it->co) {
+            replay_bh_schedule_oneshot_event(s->aio_context,
+                                             iscsi_co_generic_bh_cb, it);
+        } else {
+            it->complete = 1;
+        }
+        QTAILQ_REMOVE(&s->inflight, it, entry);
+        it->on_list = false;
+        n++;
+    }
+}
+
 /* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
-                        void *command_data, void *opaque)
+                    void *command_data, void *opaque)
 {
     struct IscsiTask *iTask = opaque;
     struct scsi_task *task = command_data;
 
+    if (iTask->deadline_armed) {
+        timer_del(&iTask->deadline_timer);
+        iTask->deadline_armed = false;
+    }
+
     iTask->status = status;
     iTask->do_retry = 0;
     iTask->err_code = 0;
     iTask->task = task;
 
     if (status != SCSI_STATUS_GOOD) {
-        iTask->err_code = -EIO;
-        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
-            if (status == SCSI_STATUS_BUSY ||
-                status == SCSI_STATUS_TIMEOUT ||
-                status == SCSI_STATUS_TASK_SET_FULL) {
-                unsigned retry_time =
-                    exp_random(iscsi_retry_times[iTask->retries - 1]);
-                if (status == SCSI_STATUS_TIMEOUT) {
-                    /* make sure the request is rescheduled AFTER the
-                     * reconnect is initiated */
-                    retry_time = EVENT_INTERVAL * 2;
-                    iTask->iscsilun->request_timed_out = true;
-                }
-                error_report("iSCSI Busy/TaskSetFull/TimeOut"
-                             " (retry #%u in %u ms): %s",
-                             iTask->retries, retry_time,
-                             iscsi_get_error(iscsi));
-                aio_timer_init(iTask->iscsilun->aio_context,
-                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
-                               SCALE_MS, iscsi_retry_timer_expired, iTask);
-                timer_mod(&iTask->retry_timer,
-                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
-                iTask->do_retry = 1;
-                return;
-            } else if (status == SCSI_STATUS_CHECK_CONDITION) {
-                int error = iscsi_translate_sense(&task->sense);
-                if (error == EAGAIN) {
-                    error_report("iSCSI CheckCondition: %s",
+        if (iTask->hard_timed_out) {
+            iTask->err_code = -ETIMEDOUT;
+            iTask->err_str = g_strdup("iSCSI hard timeout");
+            iTask->do_retry = 0;
+        } else {
+            iTask->err_code = -EIO;
+            if (iTask->retries++ < ISCSI_CMD_RETRIES) {
+                if (status == SCSI_STATUS_BUSY ||
+                    status == SCSI_STATUS_TIMEOUT ||
+                    status == SCSI_STATUS_TASK_SET_FULL) {
+                    unsigned retry_time =
+                        exp_random(iscsi_retry_times[iTask->retries - 1]);
+                    if (status == SCSI_STATUS_TIMEOUT) {
+                        /*
+                         * make sure the request is rescheduled AFTER the
+                         * reconnect is initiated
+                         */
+                        retry_time = EVENT_INTERVAL * 2;
+                        iTask->iscsilun->request_timed_out = true;
+                    }
+                    error_report("iSCSI Busy/TaskSetFull/TimeOut"
+                                 " (retry #%u in %u ms): %s",
+                                 iTask->retries, retry_time,
                                  iscsi_get_error(iscsi));
+                    aio_timer_init(iTask->iscsilun->aio_context,
+                                   &iTask->retry_timer, QEMU_CLOCK_REALTIME,
+                                   SCALE_MS, iscsi_retry_timer_expired, iTask);
+                    timer_mod(&iTask->retry_timer,
+                              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
                     iTask->do_retry = 1;
+                    return;
+                } else if (status == SCSI_STATUS_CHECK_CONDITION) {
+                    int error = iscsi_translate_sense(&task->sense);
+                    if (error == EAGAIN) {
+                        error_report("iSCSI CheckCondition: %s",
+                                     iscsi_get_error(iscsi));
+                        iTask->do_retry = 1;
+                    } else {
+                        iTask->err_code = -error;
+                        iTask->err_str = g_strdup(iscsi_get_error(iscsi));
+                    }
                 } else {
-                    iTask->err_code = -error;
+                    if (!iTask->err_str) {
+                        iTask->err_str = g_strdup(iscsi_get_error(iscsi));
+                    }
+                }
+            } else {
+                if (!iTask->err_str) {
                     iTask->err_str = g_strdup(iscsi_get_error(iscsi));
                 }
             }
         }
     }
-
     if (iTask->co) {
         replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
                                          iscsi_co_generic_bh_cb, iTask);
     } else {
         iTask->complete = 1;
+        if (iTask->task) {
+            scsi_free_scsi_task(iTask->task);
+            iTask->task = NULL;
+        }
+        g_free(iTask->err_str);
+        g_free(iTask);
     }
+
 }
 
 static void coroutine_fn
 iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 {
     *iTask = (struct IscsiTask) {
-        .co         = qemu_coroutine_self(),
-        .iscsilun   = iscsilun,
+        .co              = qemu_coroutine_self(),
+        .iscsilun        = iscsilun,
+        .deadline_armed  = false,
+        .hard_timed_out  = false,
+        .first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME),
+        .on_list         = false,
     };
 }
 
+static IscsiTask * coroutine_fn iscsi_task_new(IscsiLun *iscsilun)
+{
+    IscsiTask *iTask = g_new0(IscsiTask, 1);
+    iTask->co              = qemu_coroutine_self();
+    iTask->iscsilun        = iscsilun;
+    iTask->deadline_armed  = false;
+    iTask->hard_timed_out  = false;
+    iTask->first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    iTask->on_list         = false;
+    return iTask;
+}
+
 #ifdef __linux__
 
 /* Called (via iscsi_service) with QemuMutex held. */
@@ -371,17 +458,85 @@ iscsi_set_events(IscsiLun *iscsilun)
     }
 }
 
+/* Try to (re)connect, but throttle to avoid storms. */
+static void iscsi_maybe_reconnect(IscsiLun *iscsilun)
+{
+    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    if (now < iscsilun->next_reconnect_ms) {
+        return;
+    }
+    iscsi_reconnect(iscsilun->iscsi);
+    iscsilun->next_reconnect_ms = now + 2000; /* 2s throttle */
+    /* After changing connection state, refresh event mask immediately. */
+    iscsi_set_events(iscsilun);
+}
+
+static void iscsi_deadline_timer_expired(void *opaque)
+{
+    struct IscsiTask *iTask = opaque;
+    IscsiLun *iscsilun = iTask->iscsilun;
+
+    if (!iTask->deadline_armed) {
+        return;
+    }
+    iTask->deadline_armed = false;
+    iTask->hard_timed_out = true;
+    iTask->status   = SCSI_STATUS_TIMEOUT;
+    iTask->err_code = -ETIMEDOUT;
+
+    if (iscsilun) {
+        qemu_mutex_lock(&iscsilun->mutex);
+        if (iTask->task) {
+            iscsi_scsi_cancel_task(iscsilun->iscsi, iTask->task);
+            if (iTask->co) {
+                replay_bh_schedule_oneshot_event(iscsilun->aio_context,
+                                                 iscsi_co_generic_bh_cb, iTask);
+            }
+            iscsi_set_events(iscsilun);
+        }
+        qemu_mutex_unlock(&iscsilun->mutex);
+    }
+}
+
+static inline void iscsi_arm_deadline(struct IscsiTask *iTask)
+{
+    IscsiLun *iscsilun = iTask->iscsilun;
+
+    if (!iscsilun->io_hard_timeout_ms || iTask->deadline_armed) {
+        return;
+    }
+    aio_timer_init(iscsilun->aio_context, &iTask->deadline_timer,
+                   QEMU_CLOCK_REALTIME, SCALE_MS,
+                   iscsi_deadline_timer_expired, iTask);
+    timer_mod(&iTask->deadline_timer,
+              iTask->first_submit_ms + iscsilun->io_hard_timeout_ms);
+    iTask->deadline_armed = true;
+}
+
 static void iscsi_timed_check_events(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
 
     WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
+        bool logged_in_before = iscsilun->last_logged_in;
+        bool logged_in_now;
         /* check for timed out requests */
         iscsi_service(iscsilun->iscsi, 0);
+        logged_in_now = iscsi_is_logged_in(iscsilun->iscsi);
+        if (logged_in_before != logged_in_now) {
+            iscsilun->last_logged_in = logged_in_now;
+            if (logged_in_before && !logged_in_now && iscsilun->fail_fast) {
+                iscsi_fail_inflight(iscsilun, -ENOTCONN);
+            }
+        }
 
         if (iscsilun->request_timed_out) {
             iscsilun->request_timed_out = false;
-            iscsi_reconnect(iscsilun->iscsi);
+            iscsi_maybe_reconnect(iscsilun);
+        }
+
+        if (!logged_in_now) {
+            iscsi_maybe_reconnect(iscsilun);
         }
 
         /*
@@ -605,7 +760,7 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                 QEMUIOVector *iov, int flags)
 {
     IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
+    IscsiTask *iTask;
     uint64_t lba;
     uint32_t num_sectors;
     bool fua = flags & BDRV_REQ_FUA;
@@ -624,21 +779,27 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 
     lba = sector_qemu2lun(sector_num, iscsilun);
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    iTask = iscsi_task_new(iscsilun);
     qemu_mutex_lock(&iscsilun->mutex);
+    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+        qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask->err_str);
+        g_free(iTask);
+        return -ENOTCONN;
+    }
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
-        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+        iTask->task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             NULL, num_sectors * iscsilun->block_size,
                                             iscsilun->block_size, 0, 0, fua, 0, 0,
-                                            iscsi_co_generic_cb, &iTask,
+                                            iscsi_co_generic_cb, iTask,
                                             (struct scsi_iovec *)iov->iov, iov->niov);
     } else {
-        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+        iTask->task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             NULL, num_sectors * iscsilun->block_size,
                                             iscsilun->block_size, 0, 0, fua, 0, 0,
-                                            iscsi_co_generic_cb, &iTask,
+                                            iscsi_co_generic_cb, iTask,
                                             (struct scsi_iovec *)iov->iov, iov->niov);
     }
 #else
@@ -653,41 +814,62 @@ retry:
                                         iscsi_co_generic_cb, &iTask);
     }
 #endif
-    if (iTask.task == NULL) {
+    if (iTask->task == NULL) {
         qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask);
         return -ENOMEM;
     }
 #if LIBISCSI_API_VERSION < (20160603)
     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
                           iov->niov);
 #endif
-    iscsi_co_wait_for_task(&iTask, iscsilun);
+    iscsi_set_events(iscsilun);
+    if (!iTask->on_list) {
+        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = true;
+    }
+    iscsi_arm_deadline(iTask);
+    iscsi_co_wait_for_task(iTask, iscsilun);
 
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
+    if (!iTask->hard_timed_out && iTask->task != NULL) {
+        scsi_free_scsi_task(iTask->task);
+        iTask->task = NULL;
     }
 
-    if (iTask.do_retry) {
-        iTask.complete = 0;
+    if (iTask->do_retry) {
+        iTask->complete = 0;
         goto retry;
     }
 
-    if (iTask.status != SCSI_STATUS_GOOD) {
+    if (iTask->hard_timed_out) {
+        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+        iTask->co = NULL;
+        if (iTask->on_list) {
+            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+            iTask->on_list = false;
+        }
+        qemu_mutex_unlock(&iscsilun->mutex);
+        return r;
+    }
+
+    if (iTask->status != SCSI_STATUS_GOOD) {
         iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
                                    nb_sectors * BDRV_SECTOR_SIZE);
         error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
-                     iTask.err_str);
-        r = iTask.err_code;
-        goto out_unlock;
+                     iTask->err_str);
+        r = iTask->err_code;
+    } else {
+        iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+                                     nb_sectors * BDRV_SECTOR_SIZE);
     }
 
-    iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
-                                 nb_sectors * BDRV_SECTOR_SIZE);
-
-out_unlock:
+    if (iTask->on_list) {
+        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = false;
+    }
     qemu_mutex_unlock(&iscsilun->mutex);
-    g_free(iTask.err_str);
+    g_free(iTask->err_str);
+    g_free(iTask);
     return r;
 }
 
@@ -733,6 +915,8 @@ retry:
         ret = -ENOMEM;
         goto out_unlock;
     }
+    iscsi_arm_deadline(&iTask);
+    iscsi_set_events(iscsilun);
     iscsi_co_wait_for_task(&iTask, iscsilun);
 
     if (iTask.do_retry) {
@@ -801,7 +985,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
                                        QEMUIOVector *iov)
 {
     IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
+    IscsiTask *iTask;
     uint64_t lba;
     uint32_t num_sectors;
     int r = 0;
@@ -856,22 +1040,28 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
     lba = sector_qemu2lun(sector_num, iscsilun);
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    iTask = iscsi_task_new(iscsilun);
     qemu_mutex_lock(&iscsilun->mutex);
+    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+        qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask->err_str);
+        g_free(iTask);
+        return -ENOTCONN;
+    }
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
-        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+        iTask->task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            num_sectors * iscsilun->block_size,
                                            iscsilun->block_size, 0, 0, 0, 0, 0,
-                                           iscsi_co_generic_cb, &iTask,
+                                           iscsi_co_generic_cb, iTask,
                                            (struct scsi_iovec *)iov->iov, iov->niov);
     } else {
-        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+        iTask->task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            num_sectors * iscsilun->block_size,
                                            iscsilun->block_size,
                                            0, 0, 0, 0, 0,
-                                           iscsi_co_generic_cb, &iTask,
+                                           iscsi_co_generic_cb, iTask,
                                            (struct scsi_iovec *)iov->iov, iov->niov);
     }
 #else
@@ -887,70 +1077,119 @@ retry:
                                        iscsi_co_generic_cb, &iTask);
     }
 #endif
-    if (iTask.task == NULL) {
+    if (iTask->task == NULL) {
         qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask);
         return -ENOMEM;
     }
 #if LIBISCSI_API_VERSION < (20160603)
     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 #endif
-
-    iscsi_co_wait_for_task(&iTask, iscsilun);
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
+    if (!iTask->on_list) {
+        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = true;
+    }
+    iscsi_arm_deadline(iTask);
+    iscsi_co_wait_for_task(iTask, iscsilun);
+    if (!iTask->hard_timed_out && iTask->task != NULL) {
+        scsi_free_scsi_task(iTask->task);
+        iTask->task = NULL;
     }
 
-    if (iTask.do_retry) {
-        iTask.complete = 0;
+    if (iTask->do_retry) {
+        iTask->complete = 0;
         goto retry;
     }
 
-    if (iTask.status != SCSI_STATUS_GOOD) {
+    if (iTask->hard_timed_out) {
+        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+        iTask->co = NULL;
+        if (iTask->on_list) {
+            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+            iTask->on_list = false;
+        }
+        qemu_mutex_unlock(&iscsilun->mutex);
+        return r;
+    }
+
+    if (iTask->status != SCSI_STATUS_GOOD) {
         error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
-                     lba, iTask.err_str);
-        r = iTask.err_code;
+                     lba, iTask->err_str);
+        r = iTask->err_code;
     }
 
+    if (iTask->on_list) {
+        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = false;
+    }
     qemu_mutex_unlock(&iscsilun->mutex);
-    g_free(iTask.err_str);
+    g_free(iTask->err_str);
+    g_free(iTask);
     return r;
 }
 
 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 {
     IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
+    IscsiTask *iTask;
     int r = 0;
 
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    iTask = iscsi_task_new(iscsilun);
     qemu_mutex_lock(&iscsilun->mutex);
+    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+        qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask->err_str);
+        g_free(iTask);
+        return -ENOTCONN;
+    }
 retry:
     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
-                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
+                                      0, iscsi_co_generic_cb, iTask) == NULL) {
         qemu_mutex_unlock(&iscsilun->mutex);
+        g_free(iTask);
         return -ENOMEM;
     }
+    iscsi_set_events(iscsilun);
+    if (!iTask->on_list) {
+        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = true;
+    }
+    iscsi_arm_deadline(iTask);
+    iscsi_co_wait_for_task(iTask, iscsilun);
 
-    iscsi_co_wait_for_task(&iTask, iscsilun);
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
+    if (!iTask->hard_timed_out && iTask->task != NULL) {
+        scsi_free_scsi_task(iTask->task);
+        iTask->task = NULL;
     }
 
-    if (iTask.do_retry) {
-        iTask.complete = 0;
+    if (iTask->do_retry) {
+        iTask->complete = 0;
         goto retry;
     }
 
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
-        r = iTask.err_code;
+    if (iTask->hard_timed_out) {
+        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
+        iTask->co = NULL; /* detach */
+        if (iTask->on_list) {
+            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+            iTask->on_list = false;
+        }
+        qemu_mutex_unlock(&iscsilun->mutex);
+        return r;
     }
 
+    if (iTask->status != SCSI_STATUS_GOOD) {
+        error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask->err_str);
+        r = iTask->err_code;
+    }
+
+    if (iTask->on_list) {
+        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
+        iTask->on_list = false;
+    }
     qemu_mutex_unlock(&iscsilun->mutex);
-    g_free(iTask.err_str);
+    g_free(iTask->err_str);
+    g_free(iTask);
     return r;
 }
 
@@ -1086,6 +1325,12 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 
     data.size = 0;
     qemu_mutex_lock(&iscsilun->mutex);
+    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
+        qemu_mutex_unlock(&iscsilun->mutex);
+        acb->status = -ENOTCONN;
+        iscsi_schedule_bh(acb);
+        return &acb->common;
+    }
     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
         if (acb->ioh->iovec_count == 0) {
             data.data = acb->ioh->dxferp;
@@ -1176,6 +1421,7 @@ retry:
         goto out_unlock;
     }
 
+    iscsi_set_events(iscsilun);
     iscsi_co_wait_for_task(&iTask, iscsilun);
 
     if (iTask.task != NULL) {
@@ -1282,6 +1528,7 @@ retry:
         return -ENOMEM;
     }
 
+    iscsi_set_events(iscsilun);
     iscsi_co_wait_for_task(&iTask, iscsilun);
 
     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1415,14 +1662,24 @@ static void iscsi_nop_timed_event(void *opaque)
     IscsiLun *iscsilun = opaque;
 
     QEMU_LOCK_GUARD(&iscsilun->mutex);
+    /* If we are not logged in, use the nop timer as an additional reconnect driver. */
+    if (!iscsi_is_logged_in(iscsilun->iscsi)) {
+        iscsilun->request_timed_out = true;
+        iscsi_maybe_reconnect(iscsilun);
+        goto rearm;
+    }
     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
         error_report("iSCSI: NOP timeout. Reconnecting...");
         iscsilun->request_timed_out = true;
     } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
-        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
-        return;
+        /* Do NOT disable NOPs; treat as connection problem and try to reconnect. */
+        error_report("iSCSI: failed to send NOP-Out. Triggering reconnect.");
+        iscsilun->request_timed_out = true;
+        iscsi_maybe_reconnect(iscsilun);
+        /* keep NOPs enabled; next tick will try again */
     }
 
+rearm:
     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
     iscsi_set_events(iscsilun);
 }
@@ -1559,6 +1816,8 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
     IscsiLun *iscsilun = bs->opaque;
 
     iscsilun->aio_context = new_context;
+    iscsilun->next_reconnect_ms = 0;
+    iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
     iscsi_set_events(iscsilun);
 
     /* Set up a timer for sending out iSCSI NOPs */
@@ -1894,6 +2153,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
     }
 #endif
+    /* FORCE-ON policy: 5s hard timeout */
+    iscsilun->io_hard_timeout_ms = 5000; /* 5 seconds */
+    iscsilun->fail_fast = true;
 
     if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
@@ -1905,6 +2167,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     iscsilun->iscsi = iscsi;
     iscsilun->aio_context = bdrv_get_aio_context(bs);
     iscsilun->lun = lun;
+    iscsilun->next_reconnect_ms = 0;
+    iscsilun->last_logged_in = false; /* updated after connect */
     iscsilun->has_write_same = true;
 
     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -2007,6 +2271,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 
     qemu_mutex_init(&iscsilun->mutex);
     iscsi_attach_aio_context(bs, iscsilun->aio_context);
+    iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
+    QTAILQ_INIT(&iscsilun->inflight);
 
     /* Guess the internal cluster (page) size of the iscsi target by the means
      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
@@ -2387,6 +2653,7 @@ retry:
         goto out_unlock;
     }
 
+    iscsi_set_events(dst_lun);
     iscsi_co_wait_for_task(&iscsi_task, dst_lun);
 
     if (iscsi_task.do_retry) {
-- 
2.25.1
[Withdrawn] [PATCH] block/iscsi: fix crashes on session disconnect and I/O hard timeout
Posted by chen CJ 2 weeks, 4 days ago
I would like to withdraw this patch series.

Sorry for the inconvenience, and thank you for your understanding.

CJ Chen <cjchen@igel.co.jp> 於 2025年9月1日 週一 下午6:20寫道:
>
> When an iSCSI session drops or an I/O stalls past a hard timeout,
> requests could complete after teardown and coroutines might be woken
> in an invalid state. This patch clarifies task ownership and hardens
> completion paths.
>
> Changes:
>  - Add a per-command deadline (I/O hard timeout). On expiry, cancel
>    the libiscsi task, detach the coroutine (co = NULL), and let the
>    callback free the task safely.
>  - Track inflight tasks and fail them promptly on session disconnect
>    when fail-fast is enabled. Throttle reconnect attempts and drive
>    them via the periodic timer and NOP keepalives.
>  - Always refresh the fd event mask after state changes. Tidy event,
>    read, and write handlers, and remove unused labels/duplicates.
>  - Arm deadlines only for heap-allocated tasks (read/write/flush).
>    Stack-based helper paths continue to wait synchronously without
>    deadlines.
>
> User-visible effect: under error conditions we now return -ETIMEDOUT
> or -ENOTCONN instead of hanging or crashing. Normal I/O behavior is
> unchanged. Internally, a 5s hard timeout with fail-fast is enforced to
> avoid indefinite stalls.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3067
>
> Signed-off-by: CJ Chen <cjchen@igel.co.jp>
> ---
>  block/iscsi.c | 449 ++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 358 insertions(+), 91 deletions(-)
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 15b96ee880..094b51c47c 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -49,6 +49,7 @@
>  #include "crypto/secret.h"
>  #include "scsi/utils.h"
>  #include "trace.h"
> +#include "qemu/timer.h"
>
>  /* Conflict between scsi/utils.h and libiscsi! :( */
>  #define SCSI_XFER_NONE ISCSI_XFER_NONE
> @@ -74,6 +75,8 @@ typedef struct IscsiLun {
>      QEMUTimer *nop_timer;
>      QEMUTimer *event_timer;
>      QemuMutex mutex;
> +    int64_t next_reconnect_ms;   /* throttle repeated reconnects */
> +    bool last_logged_in;         /* for state-change logging */
>      struct scsi_inquiry_logical_block_provisioning lbp;
>      struct scsi_inquiry_block_limits bl;
>      struct scsi_inquiry_device_designator *dd;
> @@ -103,6 +106,9 @@ typedef struct IscsiLun {
>      bool dpofua;
>      bool has_write_same;
>      bool request_timed_out;
> +    uint32_t io_hard_timeout_ms;
> +    bool fail_fast;
> +    QTAILQ_HEAD(, IscsiTask) inflight;
>  } IscsiLun;
>
>  typedef struct IscsiTask {
> @@ -116,6 +122,12 @@ typedef struct IscsiTask {
>      QEMUTimer retry_timer;
>      int err_code;
>      char *err_str;
> +    QEMUTimer deadline_timer;
> +    bool deadline_armed;
> +    bool hard_timed_out;
> +    int64_t first_submit_ms;
> +    QTAILQ_ENTRY(IscsiTask) entry;
> +    bool on_list;
>  } IscsiTask;
>
>  typedef struct IscsiAIOCB {
> @@ -185,7 +197,9 @@ static void iscsi_co_generic_bh_cb(void *opaque)
>      struct IscsiTask *iTask = opaque;
>
>      iTask->complete = 1;
> -    aio_co_wake(iTask->co);
> +    if (iTask->co) {
> +        aio_co_wake(iTask->co);
> +    }
>  }
>
>  static void iscsi_retry_timer_expired(void *opaque)
> @@ -232,75 +246,148 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
>                                 sense->ascq & 0xFF);
>  }
>
> +static void iscsi_fail_inflight(IscsiLun *s, int err)
> +{
> +    IscsiTask *it, *next;
> +    int n = 0;
> +
> +    QTAILQ_FOREACH_SAFE(it, &s->inflight, entry, next) {
> +        if (it->deadline_armed) {
> +            timer_del(&it->deadline_timer);
> +            it->deadline_armed = false;
> +        }
> +        it->err_code = err ? err : -EIO;
> +        it->hard_timed_out = true;
> +        it->status = SCSI_STATUS_TIMEOUT;
> +
> +        if (it->task) {
> +            iscsi_scsi_cancel_task(s->iscsi, it->task);
> +        }
> +
> +        if (it->co) {
> +            replay_bh_schedule_oneshot_event(s->aio_context,
> +                                             iscsi_co_generic_bh_cb, it);
> +        } else {
> +            it->complete = 1;
> +        }
> +        QTAILQ_REMOVE(&s->inflight, it, entry);
> +        it->on_list = false;
> +        n++;
> +    }
> +}
> +
>  /* Called (via iscsi_service) with QemuMutex held.  */
>  static void
>  iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
> -                        void *command_data, void *opaque)
> +                    void *command_data, void *opaque)
>  {
>      struct IscsiTask *iTask = opaque;
>      struct scsi_task *task = command_data;
>
> +    if (iTask->deadline_armed) {
> +        timer_del(&iTask->deadline_timer);
> +        iTask->deadline_armed = false;
> +    }
> +
>      iTask->status = status;
>      iTask->do_retry = 0;
>      iTask->err_code = 0;
>      iTask->task = task;
>
>      if (status != SCSI_STATUS_GOOD) {
> -        iTask->err_code = -EIO;
> -        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
> -            if (status == SCSI_STATUS_BUSY ||
> -                status == SCSI_STATUS_TIMEOUT ||
> -                status == SCSI_STATUS_TASK_SET_FULL) {
> -                unsigned retry_time =
> -                    exp_random(iscsi_retry_times[iTask->retries - 1]);
> -                if (status == SCSI_STATUS_TIMEOUT) {
> -                    /* make sure the request is rescheduled AFTER the
> -                     * reconnect is initiated */
> -                    retry_time = EVENT_INTERVAL * 2;
> -                    iTask->iscsilun->request_timed_out = true;
> -                }
> -                error_report("iSCSI Busy/TaskSetFull/TimeOut"
> -                             " (retry #%u in %u ms): %s",
> -                             iTask->retries, retry_time,
> -                             iscsi_get_error(iscsi));
> -                aio_timer_init(iTask->iscsilun->aio_context,
> -                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
> -                               SCALE_MS, iscsi_retry_timer_expired, iTask);
> -                timer_mod(&iTask->retry_timer,
> -                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
> -                iTask->do_retry = 1;
> -                return;
> -            } else if (status == SCSI_STATUS_CHECK_CONDITION) {
> -                int error = iscsi_translate_sense(&task->sense);
> -                if (error == EAGAIN) {
> -                    error_report("iSCSI CheckCondition: %s",
> +        if (iTask->hard_timed_out) {
> +            iTask->err_code = -ETIMEDOUT;
> +            iTask->err_str = g_strdup("iSCSI hard timeout");
> +            iTask->do_retry = 0;
> +        } else {
> +            iTask->err_code = -EIO;
> +            if (iTask->retries++ < ISCSI_CMD_RETRIES) {
> +                if (status == SCSI_STATUS_BUSY ||
> +                    status == SCSI_STATUS_TIMEOUT ||
> +                    status == SCSI_STATUS_TASK_SET_FULL) {
> +                    unsigned retry_time =
> +                        exp_random(iscsi_retry_times[iTask->retries - 1]);
> +                    if (status == SCSI_STATUS_TIMEOUT) {
> +                        /*
> +                         * make sure the request is rescheduled AFTER the
> +                         * reconnect is initiated
> +                         */
> +                        retry_time = EVENT_INTERVAL * 2;
> +                        iTask->iscsilun->request_timed_out = true;
> +                    }
> +                    error_report("iSCSI Busy/TaskSetFull/TimeOut"
> +                                 " (retry #%u in %u ms): %s",
> +                                 iTask->retries, retry_time,
>                                   iscsi_get_error(iscsi));
> +                    aio_timer_init(iTask->iscsilun->aio_context,
> +                                   &iTask->retry_timer, QEMU_CLOCK_REALTIME,
> +                                   SCALE_MS, iscsi_retry_timer_expired, iTask);
> +                    timer_mod(&iTask->retry_timer,
> +                              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
>                      iTask->do_retry = 1;
> +                    return;
> +                } else if (status == SCSI_STATUS_CHECK_CONDITION) {
> +                    int error = iscsi_translate_sense(&task->sense);
> +                    if (error == EAGAIN) {
> +                        error_report("iSCSI CheckCondition: %s",
> +                                     iscsi_get_error(iscsi));
> +                        iTask->do_retry = 1;
> +                    } else {
> +                        iTask->err_code = -error;
> +                        iTask->err_str = g_strdup(iscsi_get_error(iscsi));
> +                    }
>                  } else {
> -                    iTask->err_code = -error;
> +                    if (!iTask->err_str) {
> +                        iTask->err_str = g_strdup(iscsi_get_error(iscsi));
> +                    }
> +                }
> +            } else {
> +                if (!iTask->err_str) {
>                      iTask->err_str = g_strdup(iscsi_get_error(iscsi));
>                  }
>              }
>          }
>      }
> -
>      if (iTask->co) {
>          replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
>                                           iscsi_co_generic_bh_cb, iTask);
>      } else {
>          iTask->complete = 1;
> +        if (iTask->task) {
> +            scsi_free_scsi_task(iTask->task);
> +            iTask->task = NULL;
> +        }
> +        g_free(iTask->err_str);
> +        g_free(iTask);
>      }
> +
>  }
>
>  static void coroutine_fn
>  iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
>  {
>      *iTask = (struct IscsiTask) {
> -        .co         = qemu_coroutine_self(),
> -        .iscsilun   = iscsilun,
> +        .co              = qemu_coroutine_self(),
> +        .iscsilun        = iscsilun,
> +        .deadline_armed  = false,
> +        .hard_timed_out  = false,
> +        .first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME),
> +        .on_list         = false,
>      };
>  }
>
> +static IscsiTask * coroutine_fn iscsi_task_new(IscsiLun *iscsilun)
> +{
> +    IscsiTask *iTask = g_new0(IscsiTask, 1);
> +    iTask->co              = qemu_coroutine_self();
> +    iTask->iscsilun        = iscsilun;
> +    iTask->deadline_armed  = false;
> +    iTask->hard_timed_out  = false;
> +    iTask->first_submit_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    iTask->on_list         = false;
> +    return iTask;
> +}
> +
>  #ifdef __linux__
>
>  /* Called (via iscsi_service) with QemuMutex held. */
> @@ -371,17 +458,85 @@ iscsi_set_events(IscsiLun *iscsilun)
>      }
>  }
>
> +/* Try to (re)connect, but throttle to avoid storms. */
> +static void iscsi_maybe_reconnect(IscsiLun *iscsilun)
> +{
> +    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    if (now < iscsilun->next_reconnect_ms) {
> +        return;
> +    }
> +    iscsi_reconnect(iscsilun->iscsi);
> +    iscsilun->next_reconnect_ms = now + 2000; /* 2s throttle */
> +    /* After changing connection state, refresh event mask immediately. */
> +    iscsi_set_events(iscsilun);
> +}
> +
> +static void iscsi_deadline_timer_expired(void *opaque)
> +{
> +    struct IscsiTask *iTask = opaque;
> +    IscsiLun *iscsilun = iTask->iscsilun;
> +
> +    if (!iTask->deadline_armed) {
> +        return;
> +    }
> +    iTask->deadline_armed = false;
> +    iTask->hard_timed_out = true;
> +    iTask->status   = SCSI_STATUS_TIMEOUT;
> +    iTask->err_code = -ETIMEDOUT;
> +
> +    if (iscsilun) {
> +        qemu_mutex_lock(&iscsilun->mutex);
> +        if (iTask->task) {
> +            iscsi_scsi_cancel_task(iscsilun->iscsi, iTask->task);
> +            if (iTask->co) {
> +                replay_bh_schedule_oneshot_event(iscsilun->aio_context,
> +                                                 iscsi_co_generic_bh_cb, iTask);
> +            }
> +            iscsi_set_events(iscsilun);
> +        }
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +    }
> +}
> +
> +static inline void iscsi_arm_deadline(struct IscsiTask *iTask)
> +{
> +    IscsiLun *iscsilun = iTask->iscsilun;
> +
> +    if (!iscsilun->io_hard_timeout_ms || iTask->deadline_armed) {
> +        return;
> +    }
> +    aio_timer_init(iscsilun->aio_context, &iTask->deadline_timer,
> +                   QEMU_CLOCK_REALTIME, SCALE_MS,
> +                   iscsi_deadline_timer_expired, iTask);
> +    timer_mod(&iTask->deadline_timer,
> +              iTask->first_submit_ms + iscsilun->io_hard_timeout_ms);
> +    iTask->deadline_armed = true;
> +}
> +
>  static void iscsi_timed_check_events(void *opaque)
>  {
>      IscsiLun *iscsilun = opaque;
>
>      WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
> +        bool logged_in_before = iscsilun->last_logged_in;
> +        bool logged_in_now;
>          /* check for timed out requests */
>          iscsi_service(iscsilun->iscsi, 0);
> +        logged_in_now = iscsi_is_logged_in(iscsilun->iscsi);
> +        if (logged_in_before != logged_in_now) {
> +            iscsilun->last_logged_in = logged_in_now;
> +            if (logged_in_before && !logged_in_now && iscsilun->fail_fast) {
> +                iscsi_fail_inflight(iscsilun, -ENOTCONN);
> +            }
> +        }
>
>          if (iscsilun->request_timed_out) {
>              iscsilun->request_timed_out = false;
> -            iscsi_reconnect(iscsilun->iscsi);
> +            iscsi_maybe_reconnect(iscsilun);
> +        }
> +
> +        if (!logged_in_now) {
> +            iscsi_maybe_reconnect(iscsilun);
>          }
>
>          /*
> @@ -605,7 +760,7 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
>                  QEMUIOVector *iov, int flags)
>  {
>      IscsiLun *iscsilun = bs->opaque;
> -    struct IscsiTask iTask;
> +    IscsiTask *iTask;
>      uint64_t lba;
>      uint32_t num_sectors;
>      bool fua = flags & BDRV_REQ_FUA;
> @@ -624,21 +779,27 @@ iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
>
>      lba = sector_qemu2lun(sector_num, iscsilun);
>      num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
> -    iscsi_co_init_iscsitask(iscsilun, &iTask);
> +    iTask = iscsi_task_new(iscsilun);
>      qemu_mutex_lock(&iscsilun->mutex);
> +    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask->err_str);
> +        g_free(iTask);
> +        return -ENOTCONN;
> +    }
>  retry:
>      if (iscsilun->use_16_for_rw) {
>  #if LIBISCSI_API_VERSION >= (20160603)
> -        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
> +        iTask->task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                              NULL, num_sectors * iscsilun->block_size,
>                                              iscsilun->block_size, 0, 0, fua, 0, 0,
> -                                            iscsi_co_generic_cb, &iTask,
> +                                            iscsi_co_generic_cb, iTask,
>                                              (struct scsi_iovec *)iov->iov, iov->niov);
>      } else {
> -        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
> +        iTask->task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                              NULL, num_sectors * iscsilun->block_size,
>                                              iscsilun->block_size, 0, 0, fua, 0, 0,
> -                                            iscsi_co_generic_cb, &iTask,
> +                                            iscsi_co_generic_cb, iTask,
>                                              (struct scsi_iovec *)iov->iov, iov->niov);
>      }
>  #else
> @@ -653,41 +814,62 @@ retry:
>                                          iscsi_co_generic_cb, &iTask);
>      }
>  #endif
> -    if (iTask.task == NULL) {
> +    if (iTask->task == NULL) {
>          qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask);
>          return -ENOMEM;
>      }
>  #if LIBISCSI_API_VERSION < (20160603)
>      scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
>                            iov->niov);
>  #endif
> -    iscsi_co_wait_for_task(&iTask, iscsilun);
> +    iscsi_set_events(iscsilun);
> +    if (!iTask->on_list) {
> +        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = true;
> +    }
> +    iscsi_arm_deadline(iTask);
> +    iscsi_co_wait_for_task(iTask, iscsilun);
>
> -    if (iTask.task != NULL) {
> -        scsi_free_scsi_task(iTask.task);
> -        iTask.task = NULL;
> +    if (!iTask->hard_timed_out && iTask->task != NULL) {
> +        scsi_free_scsi_task(iTask->task);
> +        iTask->task = NULL;
>      }
>
> -    if (iTask.do_retry) {
> -        iTask.complete = 0;
> +    if (iTask->do_retry) {
> +        iTask->complete = 0;
>          goto retry;
>      }
>
> -    if (iTask.status != SCSI_STATUS_GOOD) {
> +    if (iTask->hard_timed_out) {
> +        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
> +        iTask->co = NULL;
> +        if (iTask->on_list) {
> +            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +            iTask->on_list = false;
> +        }
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        return r;
> +    }
> +
> +    if (iTask->status != SCSI_STATUS_GOOD) {
>          iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
>                                     nb_sectors * BDRV_SECTOR_SIZE);
>          error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
> -                     iTask.err_str);
> -        r = iTask.err_code;
> -        goto out_unlock;
> +                     iTask->err_str);
> +        r = iTask->err_code;
> +    } else {
> +        iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
> +                                     nb_sectors * BDRV_SECTOR_SIZE);
>      }
>
> -    iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
> -                                 nb_sectors * BDRV_SECTOR_SIZE);
> -
> -out_unlock:
> +    if (iTask->on_list) {
> +        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = false;
> +    }
>      qemu_mutex_unlock(&iscsilun->mutex);
> -    g_free(iTask.err_str);
> +    g_free(iTask->err_str);
> +    g_free(iTask);
>      return r;
>  }
>
> @@ -733,6 +915,8 @@ retry:
>          ret = -ENOMEM;
>          goto out_unlock;
>      }
> +    iscsi_arm_deadline(&iTask);
> +    iscsi_set_events(iscsilun);
>      iscsi_co_wait_for_task(&iTask, iscsilun);
>
>      if (iTask.do_retry) {
> @@ -801,7 +985,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
>                                         QEMUIOVector *iov)
>  {
>      IscsiLun *iscsilun = bs->opaque;
> -    struct IscsiTask iTask;
> +    IscsiTask *iTask;
>      uint64_t lba;
>      uint32_t num_sectors;
>      int r = 0;
> @@ -856,22 +1040,28 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
>      lba = sector_qemu2lun(sector_num, iscsilun);
>      num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
>
> -    iscsi_co_init_iscsitask(iscsilun, &iTask);
> +    iTask = iscsi_task_new(iscsilun);
>      qemu_mutex_lock(&iscsilun->mutex);
> +    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask->err_str);
> +        g_free(iTask);
> +        return -ENOTCONN;
> +    }
>  retry:
>      if (iscsilun->use_16_for_rw) {
>  #if LIBISCSI_API_VERSION >= (20160603)
> -        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
> +        iTask->task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                             num_sectors * iscsilun->block_size,
>                                             iscsilun->block_size, 0, 0, 0, 0, 0,
> -                                           iscsi_co_generic_cb, &iTask,
> +                                           iscsi_co_generic_cb, iTask,
>                                             (struct scsi_iovec *)iov->iov, iov->niov);
>      } else {
> -        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
> +        iTask->task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
>                                             num_sectors * iscsilun->block_size,
>                                             iscsilun->block_size,
>                                             0, 0, 0, 0, 0,
> -                                           iscsi_co_generic_cb, &iTask,
> +                                           iscsi_co_generic_cb, iTask,
>                                             (struct scsi_iovec *)iov->iov, iov->niov);
>      }
>  #else
> @@ -887,70 +1077,119 @@ retry:
>                                         iscsi_co_generic_cb, &iTask);
>      }
>  #endif
> -    if (iTask.task == NULL) {
> +    if (iTask->task == NULL) {
>          qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask);
>          return -ENOMEM;
>      }
>  #if LIBISCSI_API_VERSION < (20160603)
>      scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
>  #endif
> -
> -    iscsi_co_wait_for_task(&iTask, iscsilun);
> -    if (iTask.task != NULL) {
> -        scsi_free_scsi_task(iTask.task);
> -        iTask.task = NULL;
> +    if (!iTask->on_list) {
> +        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = true;
> +    }
> +    iscsi_arm_deadline(iTask);
> +    iscsi_co_wait_for_task(iTask, iscsilun);
> +    if (!iTask->hard_timed_out && iTask->task != NULL) {
> +        scsi_free_scsi_task(iTask->task);
> +        iTask->task = NULL;
>      }
>
> -    if (iTask.do_retry) {
> -        iTask.complete = 0;
> +    if (iTask->do_retry) {
> +        iTask->complete = 0;
>          goto retry;
>      }
>
> -    if (iTask.status != SCSI_STATUS_GOOD) {
> +    if (iTask->hard_timed_out) {
> +        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
> +        iTask->co = NULL;
> +        if (iTask->on_list) {
> +            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +            iTask->on_list = false;
> +        }
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        return r;
> +    }
> +
> +    if (iTask->status != SCSI_STATUS_GOOD) {
>          error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
> -                     lba, iTask.err_str);
> -        r = iTask.err_code;
> +                     lba, iTask->err_str);
> +        r = iTask->err_code;
>      }
>
> +    if (iTask->on_list) {
> +        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = false;
> +    }
>      qemu_mutex_unlock(&iscsilun->mutex);
> -    g_free(iTask.err_str);
> +    g_free(iTask->err_str);
> +    g_free(iTask);
>      return r;
>  }
>
>  static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
>  {
>      IscsiLun *iscsilun = bs->opaque;
> -    struct IscsiTask iTask;
> +    IscsiTask *iTask;
>      int r = 0;
>
> -    iscsi_co_init_iscsitask(iscsilun, &iTask);
> +    iTask = iscsi_task_new(iscsilun);
>      qemu_mutex_lock(&iscsilun->mutex);
> +    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask->err_str);
> +        g_free(iTask);
> +        return -ENOTCONN;
> +    }
>  retry:
>      if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
> -                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
> +                                      0, iscsi_co_generic_cb, iTask) == NULL) {
>          qemu_mutex_unlock(&iscsilun->mutex);
> +        g_free(iTask);
>          return -ENOMEM;
>      }
> +    iscsi_set_events(iscsilun);
> +    if (!iTask->on_list) {
> +        QTAILQ_INSERT_TAIL(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = true;
> +    }
> +    iscsi_arm_deadline(iTask);
> +    iscsi_co_wait_for_task(iTask, iscsilun);
>
> -    iscsi_co_wait_for_task(&iTask, iscsilun);
> -
> -    if (iTask.task != NULL) {
> -        scsi_free_scsi_task(iTask.task);
> -        iTask.task = NULL;
> +    if (!iTask->hard_timed_out && iTask->task != NULL) {
> +        scsi_free_scsi_task(iTask->task);
> +        iTask->task = NULL;
>      }
>
> -    if (iTask.do_retry) {
> -        iTask.complete = 0;
> +    if (iTask->do_retry) {
> +        iTask->complete = 0;
>          goto retry;
>      }
>
> -    if (iTask.status != SCSI_STATUS_GOOD) {
> -        error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
> -        r = iTask.err_code;
> +    if (iTask->hard_timed_out) {
> +        r = iTask->err_code ? iTask->err_code : -ETIMEDOUT;
> +        iTask->co = NULL; /* detach */
> +        if (iTask->on_list) {
> +            QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +            iTask->on_list = false;
> +        }
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        return r;
>      }
>
> +    if (iTask->status != SCSI_STATUS_GOOD) {
> +        error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask->err_str);
> +        r = iTask->err_code;
> +    }
> +
> +    if (iTask->on_list) {
> +        QTAILQ_REMOVE(&iscsilun->inflight, iTask, entry);
> +        iTask->on_list = false;
> +    }
>      qemu_mutex_unlock(&iscsilun->mutex);
> -    g_free(iTask.err_str);
> +    g_free(iTask->err_str);
> +    g_free(iTask);
>      return r;
>  }
>
> @@ -1086,6 +1325,12 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
>
>      data.size = 0;
>      qemu_mutex_lock(&iscsilun->mutex);
> +    if (iscsilun->fail_fast && !iscsi_is_logged_in(iscsilun->iscsi)) {
> +        qemu_mutex_unlock(&iscsilun->mutex);
> +        acb->status = -ENOTCONN;
> +        iscsi_schedule_bh(acb);
> +        return &acb->common;
> +    }
>      if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
>          if (acb->ioh->iovec_count == 0) {
>              data.data = acb->ioh->dxferp;
> @@ -1176,6 +1421,7 @@ retry:
>          goto out_unlock;
>      }
>
> +    iscsi_set_events(iscsilun);
>      iscsi_co_wait_for_task(&iTask, iscsilun);
>
>      if (iTask.task != NULL) {
> @@ -1282,6 +1528,7 @@ retry:
>          return -ENOMEM;
>      }
>
> +    iscsi_set_events(iscsilun);
>      iscsi_co_wait_for_task(&iTask, iscsilun);
>
>      if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
> @@ -1415,14 +1662,24 @@ static void iscsi_nop_timed_event(void *opaque)
>      IscsiLun *iscsilun = opaque;
>
>      QEMU_LOCK_GUARD(&iscsilun->mutex);
> +    /* If we are not logged in, use the nop timer as an additional reconnect driver. */
> +    if (!iscsi_is_logged_in(iscsilun->iscsi)) {
> +        iscsilun->request_timed_out = true;
> +        iscsi_maybe_reconnect(iscsilun);
> +        goto rearm;
> +    }
>      if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
>          error_report("iSCSI: NOP timeout. Reconnecting...");
>          iscsilun->request_timed_out = true;
>      } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
> -        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
> -        return;
> +        /* Do NOT disable NOPs; treat as connection problem and try to reconnect. */
> +        error_report("iSCSI: failed to send NOP-Out. Triggering reconnect.");
> +        iscsilun->request_timed_out = true;
> +        iscsi_maybe_reconnect(iscsilun);
> +        /* keep NOPs enabled; next tick will try again */
>      }
>
> +rearm:
>      timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
>      iscsi_set_events(iscsilun);
>  }
> @@ -1559,6 +1816,8 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
>      IscsiLun *iscsilun = bs->opaque;
>
>      iscsilun->aio_context = new_context;
> +    iscsilun->next_reconnect_ms = 0;
> +    iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
>      iscsi_set_events(iscsilun);
>
>      /* Set up a timer for sending out iSCSI NOPs */
> @@ -1894,6 +2153,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
>          warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
>      }
>  #endif
> +    /* FORCE-ON policy: 5s hard timeout */
> +    iscsilun->io_hard_timeout_ms = 5000; /* 5 seconds */
> +    iscsilun->fail_fast = true;
>
>      if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
>          error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
> @@ -1905,6 +2167,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
>      iscsilun->iscsi = iscsi;
>      iscsilun->aio_context = bdrv_get_aio_context(bs);
>      iscsilun->lun = lun;
> +    iscsilun->next_reconnect_ms = 0;
> +    iscsilun->last_logged_in = false; /* updated after connect */
>      iscsilun->has_write_same = true;
>
>      task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
> @@ -2007,6 +2271,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
>
>      qemu_mutex_init(&iscsilun->mutex);
>      iscsi_attach_aio_context(bs, iscsilun->aio_context);
> +    iscsilun->last_logged_in = iscsi_is_logged_in(iscsilun->iscsi);
> +    QTAILQ_INIT(&iscsilun->inflight);
>
>      /* Guess the internal cluster (page) size of the iscsi target by the means
>       * of opt_unmap_gran. Transfer the unmap granularity only if it has a
> @@ -2387,6 +2653,7 @@ retry:
>          goto out_unlock;
>      }
>
> +    iscsi_set_events(dst_lun);
>      iscsi_co_wait_for_task(&iscsi_task, dst_lun);
>
>      if (iscsi_task.do_retry) {
> --
> 2.25.1
>