Associations take a refcount on queues, queues take a refcount on
associations.
The existing code lead to the situation that the target executes a
disconnect and the host retriggers a reconnect immediately. The
reconnect command still finds an existing association and uses this.
Though the reconnect crashes later on because
nvmet_fc_delete_target_assoc() blindly goes ahead and removes resources
while the reconnect code wants to use it. The problem is that
nvmet_fc_find_target_assoc() is able to lookup an association which is
beeing removed.
So the first thing to address nvmet_fc_find_target_queue() is to remove
the association out of the list and wait a RCU cycle and free resources
in the free function callback of the kref_put().
The live time of the queues are strictly bound to the lifetime of an
association. Thus we don't need to take reverse refcounts (queue ->
assocation).
Furthermore, streamline the cleanup code by using the workqueue for
delete the association in nvmet_fc_ls_disconnect. This ensures, that we
run throught the same shutdown path in all non error cases.
Reproducer: nvme/003
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
drivers/nvme/target/fc.c | 67 ++++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 34 deletions(-)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index df7d84aff843..9d7262a8e3db 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -165,6 +165,7 @@ struct nvmet_fc_tgt_assoc {
struct nvmet_fc_hostport *hostport;
struct nvmet_fc_ls_iod *rcv_disconn;
struct list_head a_list;
+ struct nvmet_fc_tgt_queue *_queues[NVMET_NR_QUEUES + 1];
struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
struct work_struct del_work;
@@ -802,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
if (!queue)
return NULL;
- if (!nvmet_fc_tgt_a_get(assoc))
- goto out_free_queue;
-
queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
assoc->tgtport->fc_target_port.port_num,
assoc->a_id, qid);
if (!queue->work_q)
- goto out_a_put;
+ goto out_free_queue;
queue->qid = qid;
queue->sqsize = sqsize;
@@ -830,7 +828,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
if (ret)
goto out_fail_iodlist;
- WARN_ON(assoc->queues[qid]);
+ WARN_ON(assoc->_queues[qid]);
+ assoc->_queues[qid] = queue;
rcu_assign_pointer(assoc->queues[qid], queue);
return queue;
@@ -838,8 +837,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
out_fail_iodlist:
nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
destroy_workqueue(queue->work_q);
-out_a_put:
- nvmet_fc_tgt_a_put(assoc);
out_free_queue:
kfree(queue);
return NULL;
@@ -852,12 +849,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
struct nvmet_fc_tgt_queue *queue =
container_of(ref, struct nvmet_fc_tgt_queue, ref);
- rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
-
nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
- nvmet_fc_tgt_a_put(queue->assoc);
-
destroy_workqueue(queue->work_q);
kfree_rcu(queue, rcu);
@@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work)
container_of(work, struct nvmet_fc_tgt_assoc, del_work);
nvmet_fc_delete_target_assoc(assoc);
+
+ /* release get taken in nvmet_fc_find_target_assoc */
+ nvmet_fc_tgt_a_put(assoc);
+
+ /* final reference from nvmet_fc_ls_create_association */
nvmet_fc_tgt_a_put(assoc);
}
@@ -1172,13 +1170,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
struct nvmet_fc_ls_iod *oldls;
unsigned long flags;
+ int i;
+
+ for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+ if (assoc->_queues[i])
+ nvmet_fc_delete_target_queue(assoc->_queues[i]);
+ }
/* Send Disconnect now that all i/o has completed */
nvmet_fc_xmt_disconnect_assoc(assoc);
nvmet_fc_free_hostport(assoc->hostport);
spin_lock_irqsave(&tgtport->lock, flags);
- list_del_rcu(&assoc->a_list);
oldls = assoc->rcv_disconn;
spin_unlock_irqrestore(&tgtport->lock, flags);
/* if pending Rcv Disconnect Association LS, send rsp now */
@@ -1208,7 +1211,6 @@ static void
nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
{
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
- struct nvmet_fc_tgt_queue *queue;
int i, terminating;
terminating = atomic_xchg(&assoc->terminating, 1);
@@ -1217,29 +1219,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
if (terminating)
return;
+ /* prevent new I/Os entering the queues */
+ for (i = NVMET_NR_QUEUES; i >= 0; i--)
+ rcu_assign_pointer(assoc->queues[i], NULL);
+ list_del_rcu(&assoc->a_list);
+ synchronize_rcu();
+ /* ensure all in-flight I/Os have been processed */
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
- rcu_read_lock();
- queue = rcu_dereference(assoc->queues[i]);
- if (!queue) {
- rcu_read_unlock();
- continue;
- }
-
- if (!nvmet_fc_tgt_q_get(queue)) {
- rcu_read_unlock();
- continue;
- }
- rcu_read_unlock();
- nvmet_fc_delete_target_queue(queue);
- nvmet_fc_tgt_q_put(queue);
+ if (assoc->_queues[i])
+ flush_workqueue(assoc->_queues[i]->work_q);
}
dev_info(tgtport->dev,
"{%d:%d} Association deleted\n",
tgtport->fc_target_port.port_num, assoc->a_id);
-
- nvmet_fc_tgt_a_put(assoc);
}
static struct nvmet_fc_tgt_assoc *
@@ -1497,6 +1491,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
nvmet_fc_tgt_a_put(assoc);
}
rcu_read_unlock();
+
+ flush_workqueue(nvmet_wq);
}
/**
@@ -1870,9 +1866,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
FCNVME_LS_DISCONNECT_ASSOC);
- /* release get taken in nvmet_fc_find_target_assoc */
- nvmet_fc_tgt_a_put(assoc);
-
/*
* The rules for LS response says the response cannot
* go back until ABTS's have been sent for all outstanding
@@ -1887,8 +1880,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
assoc->rcv_disconn = iod;
spin_unlock_irqrestore(&tgtport->lock, flags);
- nvmet_fc_delete_target_assoc(assoc);
-
if (oldls) {
dev_info(tgtport->dev,
"{%d:%d} Multiple Disconnect Association LS's "
@@ -1904,6 +1895,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
nvmet_fc_xmt_ls_rsp(tgtport, oldls);
}
+ if (!queue_work(nvmet_wq, &assoc->del_work)) {
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ }
+
return false;
}
@@ -2933,6 +2929,9 @@ static int __init nvmet_fc_init_module(void)
static void __exit nvmet_fc_exit_module(void)
{
+ /* ensure any shutdown operation, e.g. delete ctrls have finished */
+ flush_workqueue(nvmet_wq);
+
/* sanity check - all lports should be removed */
if (!list_empty(&nvmet_fc_target_list))
pr_warn("%s: targetport list not empty\n", __func__);
--
2.41.0
On 8/29/23 11:13, Daniel Wagner wrote:
> Associations take a refcount on queues, queues take a refcount on
> associations.
>
> The existing code lead to the situation that the target executes a
> disconnect and the host retriggers a reconnect immediately. The
> reconnect command still finds an existing association and uses this.
> Though the reconnect crashes later on because
> nvmet_fc_delete_target_assoc() blindly goes ahead and removes resources
> while the reconnect code wants to use it. The problem is that
> nvmet_fc_find_target_assoc() is able to lookup an association which is
> beeing removed.
>
> So the first thing to address nvmet_fc_find_target_queue() is to remove
> the association out of the list and wait a RCU cycle and free resources
> in the free function callback of the kref_put().
>
> The live time of the queues are strictly bound to the lifetime of an
> association. Thus we don't need to take reverse refcounts (queue ->
> assocation).
>
> Furthermore, streamline the cleanup code by using the workqueue for
> delete the association in nvmet_fc_ls_disconnect. This ensures, that we
> run throught the same shutdown path in all non error cases.
>
> Reproducer: nvme/003
>
> Signed-off-by: Daniel Wagner <dwagner@suse.de>
> ---
> drivers/nvme/target/fc.c | 67 ++++++++++++++++++++--------------------
> 1 file changed, 33 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
> index df7d84aff843..9d7262a8e3db 100644
> --- a/drivers/nvme/target/fc.c
> +++ b/drivers/nvme/target/fc.c
> @@ -165,6 +165,7 @@ struct nvmet_fc_tgt_assoc {
> struct nvmet_fc_hostport *hostport;
> struct nvmet_fc_ls_iod *rcv_disconn;
> struct list_head a_list;
> + struct nvmet_fc_tgt_queue *_queues[NVMET_NR_QUEUES + 1];
> struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1];
> struct kref ref;
> struct work_struct del_work;
> @@ -802,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
> if (!queue)
> return NULL;
>
> - if (!nvmet_fc_tgt_a_get(assoc))
> - goto out_free_queue;
> -
> queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
> assoc->tgtport->fc_target_port.port_num,
> assoc->a_id, qid);
> if (!queue->work_q)
> - goto out_a_put;
> + goto out_free_queue;
>
> queue->qid = qid;
> queue->sqsize = sqsize;
> @@ -830,7 +828,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
> if (ret)
> goto out_fail_iodlist;
>
> - WARN_ON(assoc->queues[qid]);
> + WARN_ON(assoc->_queues[qid]);
> + assoc->_queues[qid] = queue;
> rcu_assign_pointer(assoc->queues[qid], queue);
>
> return queue;
> @@ -838,8 +837,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
> out_fail_iodlist:
> nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
> destroy_workqueue(queue->work_q);
> -out_a_put:
> - nvmet_fc_tgt_a_put(assoc);
> out_free_queue:
> kfree(queue);
> return NULL;
> @@ -852,12 +849,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
> struct nvmet_fc_tgt_queue *queue =
> container_of(ref, struct nvmet_fc_tgt_queue, ref);
>
> - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
> -
> nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
>
> - nvmet_fc_tgt_a_put(queue->assoc);
> -
> destroy_workqueue(queue->work_q);
>
> kfree_rcu(queue, rcu);
> @@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work)
> container_of(work, struct nvmet_fc_tgt_assoc, del_work);
>
> nvmet_fc_delete_target_assoc(assoc);
> +
> + /* release get taken in nvmet_fc_find_target_assoc */
> + nvmet_fc_tgt_a_put(assoc);
> +
> + /* final reference from nvmet_fc_ls_create_association */
> nvmet_fc_tgt_a_put(assoc);
> }
>
That feels wrong. If we're having to do two put in a row it seems that
we're taking one reference too many here.
I would assume that each queue takes a reference on the association;
isn't that the case here?
> @@ -1172,13 +1170,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
> struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
> struct nvmet_fc_ls_iod *oldls;
> unsigned long flags;
> + int i;
> +
> + for (i = NVMET_NR_QUEUES; i >= 0; i--) {
> + if (assoc->_queues[i])
> + nvmet_fc_delete_target_queue(assoc->_queues[i]);
> + }
>
> /* Send Disconnect now that all i/o has completed */
> nvmet_fc_xmt_disconnect_assoc(assoc);
>
> nvmet_fc_free_hostport(assoc->hostport);
> spin_lock_irqsave(&tgtport->lock, flags);
> - list_del_rcu(&assoc->a_list);
> oldls = assoc->rcv_disconn;
> spin_unlock_irqrestore(&tgtport->lock, flags);
> /* if pending Rcv Disconnect Association LS, send rsp now */
> @@ -1208,7 +1211,6 @@ static void
> nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
> {
> struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
> - struct nvmet_fc_tgt_queue *queue;
> int i, terminating;
>
> terminating = atomic_xchg(&assoc->terminating, 1);
> @@ -1217,29 +1219,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
> if (terminating)
> return;
>
> + /* prevent new I/Os entering the queues */
> + for (i = NVMET_NR_QUEUES; i >= 0; i--)
> + rcu_assign_pointer(assoc->queues[i], NULL);
> + list_del_rcu(&assoc->a_list);
> + synchronize_rcu();
>
Watch out for 'list_del_rcu()'. That does _not_ modify the pointer for
the element in question, only those from the list.
So to avoid concurrency with nvmet_fc_alloc_target_assoc() I guess we
need to get the tgtport lock here.
> + /* ensure all in-flight I/Os have been processed */
> for (i = NVMET_NR_QUEUES; i >= 0; i--) {
> - rcu_read_lock();
> - queue = rcu_dereference(assoc->queues[i]);
> - if (!queue) {
> - rcu_read_unlock();
> - continue;
> - }
> -
> - if (!nvmet_fc_tgt_q_get(queue)) {
> - rcu_read_unlock();
> - continue;
> - }
> - rcu_read_unlock();
> - nvmet_fc_delete_target_queue(queue);
> - nvmet_fc_tgt_q_put(queue);
> + if (assoc->_queues[i])
> + flush_workqueue(assoc->_queues[i]->work_q);
> }
>
> dev_info(tgtport->dev,
> "{%d:%d} Association deleted\n",
> tgtport->fc_target_port.port_num, assoc->a_id);
> -
> - nvmet_fc_tgt_a_put(assoc);
> }
>
> static struct nvmet_fc_tgt_assoc *
> @@ -1497,6 +1491,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
> nvmet_fc_tgt_a_put(assoc);
> }
> rcu_read_unlock();
> +
> + flush_workqueue(nvmet_wq);
> }
>
> /**
> @@ -1870,9 +1866,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
> sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
> FCNVME_LS_DISCONNECT_ASSOC);
>
> - /* release get taken in nvmet_fc_find_target_assoc */
> - nvmet_fc_tgt_a_put(assoc);
> -
> /*
> * The rules for LS response says the response cannot
> * go back until ABTS's have been sent for all outstanding
> @@ -1887,8 +1880,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
> assoc->rcv_disconn = iod;
> spin_unlock_irqrestore(&tgtport->lock, flags);
>
> - nvmet_fc_delete_target_assoc(assoc);
> -
> if (oldls) {
> dev_info(tgtport->dev,
> "{%d:%d} Multiple Disconnect Association LS's "
> @@ -1904,6 +1895,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
> nvmet_fc_xmt_ls_rsp(tgtport, oldls);
> }
>
> + if (!queue_work(nvmet_wq, &assoc->del_work)) {
> + /* already deleting - release local reference */
> + nvmet_fc_tgt_a_put(assoc);
> + }
> +
> return false;
> }
>
> @@ -2933,6 +2929,9 @@ static int __init nvmet_fc_init_module(void)
>
> static void __exit nvmet_fc_exit_module(void)
> {
> + /* ensure any shutdown operation, e.g. delete ctrls have finished */
> + flush_workqueue(nvmet_wq);
> +
> /* sanity check - all lports should be removed */
> if (!list_empty(&nvmet_fc_target_list))
> pr_warn("%s: targetport list not empty\n", __func__);
Otherwise looks good.
Cheers,
Hannes
On Wed, Sep 06, 2023 at 01:22:28PM +0200, Hannes Reinecke wrote: > destroy_workqueue(queue->work_q); > > kfree_rcu(queue, rcu); > > @@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work) > > container_of(work, struct nvmet_fc_tgt_assoc, del_work); > > nvmet_fc_delete_target_assoc(assoc); > > + > > + /* release get taken in nvmet_fc_find_target_assoc */ > > + nvmet_fc_tgt_a_put(assoc); > > + > > + /* final reference from nvmet_fc_ls_create_association */ > > nvmet_fc_tgt_a_put(assoc); > > } > That feels wrong. If we're having to do two put in a row it seems that > we're taking one reference too many here. When the association is created the first reference is taken. This is the one we want to release here. But as nvmet_fc_find_target_assoc always takes a reference we have to drop that one too. One possibility would be to introduce a lookup function which doesn't take the reference. > + /* prevent new I/Os entering the queues */ > > + for (i = NVMET_NR_QUEUES; i >= 0; i--) > > + rcu_assign_pointer(assoc->queues[i], NULL); > > + list_del_rcu(&assoc->a_list); > > + synchronize_rcu(); > Watch out for 'list_del_rcu()'. That does _not_ modify the pointer for the > element in question, only those from the list. > So to avoid concurrency with nvmet_fc_alloc_target_assoc() I guess we need > to get the tgtport lock here. Yes, we need to protect from concurent write access obviously.
© 2016 - 2025 Red Hat, Inc.