nvmet-fc blktests & autoconnect fixes

[RFC v1 3/4] nvmet-fc: untangle cross refcounting objects

Posted by Daniel Wagner 2 years, 5 months ago

Associations take a refcount on queues, queues take a refcount on
associations.

The existing code lead to the situation that the target executes a
disconnect and the host retriggers a reconnect immediately. The
reconnect command still finds an existing association and uses this.
Though the reconnect crashes later on because
nvmet_fc_delete_target_assoc() blindly goes ahead and removes resources
while the reconnect code wants to use it. The problem is that
nvmet_fc_find_target_assoc() is able to lookup an association which is
beeing removed.

So the first thing to address nvmet_fc_find_target_queue() is to remove
the association out of the list and wait a RCU cycle and free resources
in the free function callback of the kref_put().

The live time of the queues are strictly bound to the lifetime of an
association. Thus we don't need to take reverse refcounts (queue ->
assocation).

Furthermore, streamline the cleanup code by using the workqueue for
delete the association in nvmet_fc_ls_disconnect. This ensures, that we
run throught the same shutdown path in all non error cases.

Reproducer: nvme/003

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/target/fc.c | 67 ++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index df7d84aff843..9d7262a8e3db 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -165,6 +165,7 @@ struct nvmet_fc_tgt_assoc {
 	struct nvmet_fc_hostport	*hostport;
 	struct nvmet_fc_ls_iod		*rcv_disconn;
 	struct list_head		a_list;
+	struct nvmet_fc_tgt_queue	*_queues[NVMET_NR_QUEUES + 1];
 	struct nvmet_fc_tgt_queue __rcu	*queues[NVMET_NR_QUEUES + 1];
 	struct kref			ref;
 	struct work_struct		del_work;
@@ -802,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	if (!queue)
 		return NULL;
 
-	if (!nvmet_fc_tgt_a_get(assoc))
-		goto out_free_queue;
-
 	queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
 				assoc->tgtport->fc_target_port.port_num,
 				assoc->a_id, qid);
 	if (!queue->work_q)
-		goto out_a_put;
+		goto out_free_queue;
 
 	queue->qid = qid;
 	queue->sqsize = sqsize;
@@ -830,7 +828,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	if (ret)
 		goto out_fail_iodlist;
 
-	WARN_ON(assoc->queues[qid]);
+	WARN_ON(assoc->_queues[qid]);
+	assoc->_queues[qid] = queue;
 	rcu_assign_pointer(assoc->queues[qid], queue);
 
 	return queue;
@@ -838,8 +837,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 out_fail_iodlist:
 	nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
 	destroy_workqueue(queue->work_q);
-out_a_put:
-	nvmet_fc_tgt_a_put(assoc);
 out_free_queue:
 	kfree(queue);
 	return NULL;
@@ -852,12 +849,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
 	struct nvmet_fc_tgt_queue *queue =
 		container_of(ref, struct nvmet_fc_tgt_queue, ref);
 
-	rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
-
 	nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
 
-	nvmet_fc_tgt_a_put(queue->assoc);
-
 	destroy_workqueue(queue->work_q);
 
 	kfree_rcu(queue, rcu);
@@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work)
 		container_of(work, struct nvmet_fc_tgt_assoc, del_work);
 
 	nvmet_fc_delete_target_assoc(assoc);
+
+	/* release get taken in nvmet_fc_find_target_assoc */
+	nvmet_fc_tgt_a_put(assoc);
+
+	/* final reference from nvmet_fc_ls_create_association */
 	nvmet_fc_tgt_a_put(assoc);
 }
 
@@ -1172,13 +1170,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
 	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
 	struct nvmet_fc_ls_iod	*oldls;
 	unsigned long flags;
+	int i;
+
+	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+		if (assoc->_queues[i])
+			nvmet_fc_delete_target_queue(assoc->_queues[i]);
+	}
 
 	/* Send Disconnect now that all i/o has completed */
 	nvmet_fc_xmt_disconnect_assoc(assoc);
 
 	nvmet_fc_free_hostport(assoc->hostport);
 	spin_lock_irqsave(&tgtport->lock, flags);
-	list_del_rcu(&assoc->a_list);
 	oldls = assoc->rcv_disconn;
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 	/* if pending Rcv Disconnect Association LS, send rsp now */
@@ -1208,7 +1211,6 @@ static void
 nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 {
 	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
-	struct nvmet_fc_tgt_queue *queue;
 	int i, terminating;
 
 	terminating = atomic_xchg(&assoc->terminating, 1);
@@ -1217,29 +1219,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 	if (terminating)
 		return;
 
+	/* prevent new I/Os entering the queues */
+	for (i = NVMET_NR_QUEUES; i >= 0; i--)
+		rcu_assign_pointer(assoc->queues[i], NULL);
+	list_del_rcu(&assoc->a_list);
+	synchronize_rcu();
 
+	/* ensure all in-flight I/Os have been processed */
 	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
-		rcu_read_lock();
-		queue = rcu_dereference(assoc->queues[i]);
-		if (!queue) {
-			rcu_read_unlock();
-			continue;
-		}
-
-		if (!nvmet_fc_tgt_q_get(queue)) {
-			rcu_read_unlock();
-			continue;
-		}
-		rcu_read_unlock();
-		nvmet_fc_delete_target_queue(queue);
-		nvmet_fc_tgt_q_put(queue);
+		if (assoc->_queues[i])
+			flush_workqueue(assoc->_queues[i]->work_q);
 	}
 
 	dev_info(tgtport->dev,
 		"{%d:%d} Association deleted\n",
 		tgtport->fc_target_port.port_num, assoc->a_id);
-
-	nvmet_fc_tgt_a_put(assoc);
 }
 
 static struct nvmet_fc_tgt_assoc *
@@ -1497,6 +1491,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
 			nvmet_fc_tgt_a_put(assoc);
 	}
 	rcu_read_unlock();
+
+	flush_workqueue(nvmet_wq);
 }
 
 /**
@@ -1870,9 +1866,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 				sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
 			FCNVME_LS_DISCONNECT_ASSOC);
 
-	/* release get taken in nvmet_fc_find_target_assoc */
-	nvmet_fc_tgt_a_put(assoc);
-
 	/*
 	 * The rules for LS response says the response cannot
 	 * go back until ABTS's have been sent for all outstanding
@@ -1887,8 +1880,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 	assoc->rcv_disconn = iod;
 	spin_unlock_irqrestore(&tgtport->lock, flags);
 
-	nvmet_fc_delete_target_assoc(assoc);
-
 	if (oldls) {
 		dev_info(tgtport->dev,
 			"{%d:%d} Multiple Disconnect Association LS's "
@@ -1904,6 +1895,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 		nvmet_fc_xmt_ls_rsp(tgtport, oldls);
 	}
 
+	if (!queue_work(nvmet_wq, &assoc->del_work)) {
+		/* already deleting - release local reference */
+		nvmet_fc_tgt_a_put(assoc);
+	}
+
 	return false;
 }
 
@@ -2933,6 +2929,9 @@ static int __init nvmet_fc_init_module(void)
 
 static void __exit nvmet_fc_exit_module(void)
 {
+	/* ensure any shutdown operation, e.g. delete ctrls have finished */
+	flush_workqueue(nvmet_wq);
+
 	/* sanity check - all lports should be removed */
 	if (!list_empty(&nvmet_fc_target_list))
 		pr_warn("%s: targetport list not empty\n", __func__);
-- 
2.41.0

Re: [RFC v1 3/4] nvmet-fc: untangle cross refcounting objects

Posted by Hannes Reinecke 2 years, 5 months ago

On 8/29/23 11:13, Daniel Wagner wrote:
> Associations take a refcount on queues, queues take a refcount on
> associations.
> 
> The existing code lead to the situation that the target executes a
> disconnect and the host retriggers a reconnect immediately. The
> reconnect command still finds an existing association and uses this.
> Though the reconnect crashes later on because
> nvmet_fc_delete_target_assoc() blindly goes ahead and removes resources
> while the reconnect code wants to use it. The problem is that
> nvmet_fc_find_target_assoc() is able to lookup an association which is
> beeing removed.
> 
> So the first thing to address nvmet_fc_find_target_queue() is to remove
> the association out of the list and wait a RCU cycle and free resources
> in the free function callback of the kref_put().
> 
> The live time of the queues are strictly bound to the lifetime of an
> association. Thus we don't need to take reverse refcounts (queue ->
> assocation).
> 
> Furthermore, streamline the cleanup code by using the workqueue for
> delete the association in nvmet_fc_ls_disconnect. This ensures, that we
> run throught the same shutdown path in all non error cases.
> 
> Reproducer: nvme/003
> 
> Signed-off-by: Daniel Wagner <dwagner@suse.de>
> ---
>   drivers/nvme/target/fc.c | 67 ++++++++++++++++++++--------------------
>   1 file changed, 33 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
> index df7d84aff843..9d7262a8e3db 100644
> --- a/drivers/nvme/target/fc.c
> +++ b/drivers/nvme/target/fc.c
> @@ -165,6 +165,7 @@ struct nvmet_fc_tgt_assoc {
>   	struct nvmet_fc_hostport	*hostport;
>   	struct nvmet_fc_ls_iod		*rcv_disconn;
>   	struct list_head		a_list;
> +	struct nvmet_fc_tgt_queue	*_queues[NVMET_NR_QUEUES + 1];
>   	struct nvmet_fc_tgt_queue __rcu	*queues[NVMET_NR_QUEUES + 1];
>   	struct kref			ref;
>   	struct work_struct		del_work;
> @@ -802,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
>   	if (!queue)
>   		return NULL;
>   
> -	if (!nvmet_fc_tgt_a_get(assoc))
> -		goto out_free_queue;
> -
>   	queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
>   				assoc->tgtport->fc_target_port.port_num,
>   				assoc->a_id, qid);
>   	if (!queue->work_q)
> -		goto out_a_put;
> +		goto out_free_queue;
>   
>   	queue->qid = qid;
>   	queue->sqsize = sqsize;
> @@ -830,7 +828,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
>   	if (ret)
>   		goto out_fail_iodlist;
>   
> -	WARN_ON(assoc->queues[qid]);
> +	WARN_ON(assoc->_queues[qid]);
> +	assoc->_queues[qid] = queue;
>   	rcu_assign_pointer(assoc->queues[qid], queue);
>   
>   	return queue;
> @@ -838,8 +837,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
>   out_fail_iodlist:
>   	nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
>   	destroy_workqueue(queue->work_q);
> -out_a_put:
> -	nvmet_fc_tgt_a_put(assoc);
>   out_free_queue:
>   	kfree(queue);
>   	return NULL;
> @@ -852,12 +849,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
>   	struct nvmet_fc_tgt_queue *queue =
>   		container_of(ref, struct nvmet_fc_tgt_queue, ref);
>   
> -	rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
> -
>   	nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
>   
> -	nvmet_fc_tgt_a_put(queue->assoc);
> -
>   	destroy_workqueue(queue->work_q);
>   
>   	kfree_rcu(queue, rcu);
> @@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work)
>   		container_of(work, struct nvmet_fc_tgt_assoc, del_work);
>   
>   	nvmet_fc_delete_target_assoc(assoc);
> +
> +	/* release get taken in nvmet_fc_find_target_assoc */
> +	nvmet_fc_tgt_a_put(assoc);
> +
> +	/* final reference from nvmet_fc_ls_create_association */
>   	nvmet_fc_tgt_a_put(assoc);
>   }
>   
That feels wrong. If we're having to do two put in a row it seems that
we're taking one reference too many here.
I would assume that each queue takes a reference on the association; 
isn't that the case here?

> @@ -1172,13 +1170,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
>   	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
>   	struct nvmet_fc_ls_iod	*oldls;
>   	unsigned long flags;
> +	int i;
> +
> +	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
> +		if (assoc->_queues[i])
> +			nvmet_fc_delete_target_queue(assoc->_queues[i]);
> +	}
>   
>   	/* Send Disconnect now that all i/o has completed */
>   	nvmet_fc_xmt_disconnect_assoc(assoc);
>   
>   	nvmet_fc_free_hostport(assoc->hostport);
>   	spin_lock_irqsave(&tgtport->lock, flags);
> -	list_del_rcu(&assoc->a_list);
>   	oldls = assoc->rcv_disconn;
>   	spin_unlock_irqrestore(&tgtport->lock, flags);
>   	/* if pending Rcv Disconnect Association LS, send rsp now */
> @@ -1208,7 +1211,6 @@ static void
>   nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
>   {
>   	struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
> -	struct nvmet_fc_tgt_queue *queue;
>   	int i, terminating;
>   
>   	terminating = atomic_xchg(&assoc->terminating, 1);
> @@ -1217,29 +1219,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
>   	if (terminating)
>   		return;
>   
> +	/* prevent new I/Os entering the queues */
> +	for (i = NVMET_NR_QUEUES; i >= 0; i--)
> +		rcu_assign_pointer(assoc->queues[i], NULL);
> +	list_del_rcu(&assoc->a_list);
> +	synchronize_rcu();
>   
Watch out for 'list_del_rcu()'. That does _not_ modify the pointer for 
the element in question, only those from the list.
So to avoid concurrency with nvmet_fc_alloc_target_assoc() I guess we 
need to get the tgtport lock here.

> +	/* ensure all in-flight I/Os have been processed */
>   	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
> -		rcu_read_lock();
> -		queue = rcu_dereference(assoc->queues[i]);
> -		if (!queue) {
> -			rcu_read_unlock();
> -			continue;
> -		}
> -
> -		if (!nvmet_fc_tgt_q_get(queue)) {
> -			rcu_read_unlock();
> -			continue;
> -		}
> -		rcu_read_unlock();
> -		nvmet_fc_delete_target_queue(queue);
> -		nvmet_fc_tgt_q_put(queue);
> +		if (assoc->_queues[i])
> +			flush_workqueue(assoc->_queues[i]->work_q);
>   	}
>   
>   	dev_info(tgtport->dev,
>   		"{%d:%d} Association deleted\n",
>   		tgtport->fc_target_port.port_num, assoc->a_id);
> -
> -	nvmet_fc_tgt_a_put(assoc);
>   }
>   
>   static struct nvmet_fc_tgt_assoc *
> @@ -1497,6 +1491,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
>   			nvmet_fc_tgt_a_put(assoc);
>   	}
>   	rcu_read_unlock();
> +
> +	flush_workqueue(nvmet_wq);
>   }
>   
>   /**
> @@ -1870,9 +1866,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
>   				sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
>   			FCNVME_LS_DISCONNECT_ASSOC);
>   
> -	/* release get taken in nvmet_fc_find_target_assoc */
> -	nvmet_fc_tgt_a_put(assoc);
> -
>   	/*
>   	 * The rules for LS response says the response cannot
>   	 * go back until ABTS's have been sent for all outstanding
> @@ -1887,8 +1880,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
>   	assoc->rcv_disconn = iod;
>   	spin_unlock_irqrestore(&tgtport->lock, flags);
>   
> -	nvmet_fc_delete_target_assoc(assoc);
> -
>   	if (oldls) {
>   		dev_info(tgtport->dev,
>   			"{%d:%d} Multiple Disconnect Association LS's "
> @@ -1904,6 +1895,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
>   		nvmet_fc_xmt_ls_rsp(tgtport, oldls);
>   	}
>   
> +	if (!queue_work(nvmet_wq, &assoc->del_work)) {
> +		/* already deleting - release local reference */
> +		nvmet_fc_tgt_a_put(assoc);
> +	}
> +
>   	return false;
>   }
>   
> @@ -2933,6 +2929,9 @@ static int __init nvmet_fc_init_module(void)
>   
>   static void __exit nvmet_fc_exit_module(void)
>   {
> +	/* ensure any shutdown operation, e.g. delete ctrls have finished */
> +	flush_workqueue(nvmet_wq);
> +
>   	/* sanity check - all lports should be removed */
>   	if (!list_empty(&nvmet_fc_target_list))
>   		pr_warn("%s: targetport list not empty\n", __func__);

Otherwise looks good.

Cheers,

Hannes

Re: [RFC v1 3/4] nvmet-fc: untangle cross refcounting objects

Posted by Daniel Wagner 2 years, 5 months ago

On Wed, Sep 06, 2023 at 01:22:28PM +0200, Hannes Reinecke wrote:
 >   	destroy_workqueue(queue->work_q);
> >   	kfree_rcu(queue, rcu);
> > @@ -1100,6 +1093,11 @@ nvmet_fc_delete_assoc(struct work_struct *work)
> >   		container_of(work, struct nvmet_fc_tgt_assoc, del_work);
> >   	nvmet_fc_delete_target_assoc(assoc);
> > +
> > +	/* release get taken in nvmet_fc_find_target_assoc */
> > +	nvmet_fc_tgt_a_put(assoc);
> > +
> > +	/* final reference from nvmet_fc_ls_create_association */
> >   	nvmet_fc_tgt_a_put(assoc);
> >   }
> That feels wrong. If we're having to do two put in a row it seems that
> we're taking one reference too many here.

When the association is created the first reference is taken. This is
the one we want to release here. But as nvmet_fc_find_target_assoc
always takes a reference we have to drop that one too. One possibility
would be to introduce a lookup function which doesn't take the
reference.

 > +	/* prevent new I/Os entering the queues */
> > +	for (i = NVMET_NR_QUEUES; i >= 0; i--)
> > +		rcu_assign_pointer(assoc->queues[i], NULL);
> > +	list_del_rcu(&assoc->a_list);
> > +	synchronize_rcu();
> Watch out for 'list_del_rcu()'. That does _not_ modify the pointer for the
> element in question, only those from the list.
> So to avoid concurrency with nvmet_fc_alloc_target_assoc() I guess we need
> to get the tgtport lock here.

Yes, we need to protect from concurent write access obviously.

[RFC v1 1/4] nvmet-trace: avoid dereferencing pointer too early
[RFC v1 2/4] nvmet-trace: null terminate device name string correctly
[RFC v1 3/4] nvmet-fc: untangle cross refcounting objects
[RFC v1 4/4] nvmet-discovery: Do not use invalid port
[RFC v1 4/4] nvmet-discovery: do not use invalid port