[PATCH 05/11] nvmet-fcloop: track tport with ref counting

Daniel Wagner posted 11 patches 11 months, 2 weeks ago
There is a newer version of this series
[PATCH 05/11] nvmet-fcloop: track tport with ref counting
Posted by Daniel Wagner 11 months, 2 weeks ago
The tport object is created via nvmet_fc_register_targetport and freed
via nvmet_fc_unregister_targetport. That means after the port is
unregistered nothing should use it. Thus ensure with refcounting
that there is no user left before the unregister step.

Signed-off-by: Daniel Wagner <wagi@kernel.org>
---
 drivers/nvme/target/fcloop.c | 52 +++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 80693705c069dd114b2d4f15d0482dd2d713a273..2269b4d20af2ef9bb423617b94a5f5326ea124bd 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -233,8 +233,12 @@ struct fcloop_tport {
 	spinlock_t			lock;
 	struct list_head		ls_list;
 	struct work_struct		ls_work;
+	struct kref			ref;
 };
 
+static int fcloop_tport_get(struct fcloop_tport *tport);
+static void fcloop_tport_put(struct fcloop_tport *tport);
+
 struct fcloop_nport {
 	struct fcloop_rport *rport;
 	struct fcloop_tport *tport;
@@ -426,6 +430,7 @@ fcloop_tport_lsrqst_work(struct work_struct *work)
 		spin_lock(&tport->lock);
 	}
 	spin_unlock(&tport->lock);
+	fcloop_tport_put(tport);
 }
 
 static int
@@ -444,12 +449,16 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
 	tls_req->lsreq = lsreq;
 	INIT_LIST_HEAD(&tls_req->ls_list);
 
+	if (!tport)
+		return -ECONNABORTED;
+
 	if (!tport->remoteport) {
 		tls_req->status = -ECONNREFUSED;
 		spin_lock(&tport->lock);
 		list_add_tail(&tls_req->ls_list, &tport->ls_list);
 		spin_unlock(&tport->lock);
-		queue_work(nvmet_wq, &tport->ls_work);
+		if (queue_work(nvmet_wq, &tport->ls_work))
+			fcloop_tport_get(tport);
 		return ret;
 	}
 
@@ -481,7 +490,8 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
 		spin_lock(&tport->lock);
 		list_add_tail(&tport->ls_list, &tls_req->ls_list);
 		spin_unlock(&tport->lock);
-		queue_work(nvmet_wq, &tport->ls_work);
+		if (queue_work(nvmet_wq, &tport->ls_work))
+			fcloop_tport_get(tport);
 	}
 
 	return 0;
@@ -1496,6 +1506,8 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
 
 	/* success */
 	tport = targetport->private;
+	kref_init(&tport->ref);
+
 	tport->targetport = targetport;
 	tport->remoteport = (nport->rport) ?  nport->rport->remoteport : NULL;
 	if (nport->rport)
@@ -1526,21 +1538,30 @@ __unlink_target_port(struct fcloop_nport *nport)
 	return tport;
 }
 
-static int
-__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
+static void
+fcloop_targetport_unreg(struct kref *ref)
 {
-	int ret;
+	struct fcloop_tport *tport =
+		container_of(ref, struct fcloop_tport, ref);
+	struct fcloop_nport *nport;
 
-	if (!tport) {
-		ret = -EALREADY;
-		goto out;
-	}
+	nport = tport->nport;
+	nvmet_fc_unregister_targetport(tport->targetport);
 
-	ret = nvmet_fc_unregister_targetport(tport->targetport);
-out:
 	/* nport ref put: targetport */
 	fcloop_nport_put(nport);
-	return ret;
+}
+
+static int
+fcloop_tport_get(struct fcloop_tport *tport)
+{
+	return kref_get_unless_zero(&tport->ref);
+}
+
+static void
+fcloop_tport_put(struct fcloop_tport *tport)
+{
+	kref_put(&tport->ref, fcloop_targetport_unreg);
 }
 
 static ssize_t
@@ -1576,8 +1597,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __targetport_unreg(nport, tport);
-
+	fcloop_tport_put(tport);
 	fcloop_nport_put(nport);
 
 	return ret ? ret : count;
@@ -1696,9 +1716,7 @@ static void __exit fcloop_exit(void)
 
 		spin_unlock_irqrestore(&fcloop_lock, flags);
 
-		ret = __targetport_unreg(nport, tport);
-		if (ret)
-			pr_warn("%s: Failed deleting target port\n", __func__);
+		fcloop_tport_put(tport);
 
 		ret = __remoteport_unreg(nport, rport);
 		if (ret)

-- 
2.48.1
Re: [PATCH 05/11] nvmet-fcloop: track tport with ref counting
Posted by Hannes Reinecke 11 months, 2 weeks ago
On 2/26/25 19:45, Daniel Wagner wrote:
> The tport object is created via nvmet_fc_register_targetport and freed
> via nvmet_fc_unregister_targetport. That means after the port is
> unregistered nothing should use it. Thus ensure with refcounting
> that there is no user left before the unregister step.
> 
> Signed-off-by: Daniel Wagner <wagi@kernel.org>
> ---
>   drivers/nvme/target/fcloop.c | 52 +++++++++++++++++++++++++++++---------------
>   1 file changed, 35 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
> index 80693705c069dd114b2d4f15d0482dd2d713a273..2269b4d20af2ef9bb423617b94a5f5326ea124bd 100644
> --- a/drivers/nvme/target/fcloop.c
> +++ b/drivers/nvme/target/fcloop.c
> @@ -233,8 +233,12 @@ struct fcloop_tport {
>   	spinlock_t			lock;
>   	struct list_head		ls_list;
>   	struct work_struct		ls_work;
> +	struct kref			ref;
>   };
>   
> +static int fcloop_tport_get(struct fcloop_tport *tport);
> +static void fcloop_tport_put(struct fcloop_tport *tport);
> +
>   struct fcloop_nport {
>   	struct fcloop_rport *rport;
>   	struct fcloop_tport *tport;
> @@ -426,6 +430,7 @@ fcloop_tport_lsrqst_work(struct work_struct *work)
>   		spin_lock(&tport->lock);
>   	}
>   	spin_unlock(&tport->lock);
> +	fcloop_tport_put(tport);
>   }
>   
>   static int
> @@ -444,12 +449,16 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
>   	tls_req->lsreq = lsreq;
>   	INIT_LIST_HEAD(&tls_req->ls_list);
>   
> +	if (!tport)
> +		return -ECONNABORTED;
> +
>   	if (!tport->remoteport) {
>   		tls_req->status = -ECONNREFUSED;
>   		spin_lock(&tport->lock);
>   		list_add_tail(&tls_req->ls_list, &tport->ls_list);
>   		spin_unlock(&tport->lock);
> -		queue_work(nvmet_wq, &tport->ls_work);
> +		if (queue_work(nvmet_wq, &tport->ls_work))
> +			fcloop_tport_get(tport);

Don't you need to remove the 'tls_req' from the list, too, seeing that
it'll never be transferred?

>   		return ret;
>   	}
>   
> @@ -481,7 +490,8 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
>   		spin_lock(&tport->lock);
>   		list_add_tail(&tport->ls_list, &tls_req->ls_list);
>   		spin_unlock(&tport->lock);
> -		queue_work(nvmet_wq, &tport->ls_work);
> +		if (queue_work(nvmet_wq, &tport->ls_work))
> +			fcloop_tport_get(tport);

Same argument here.

>   	}
>   
>   	return 0;
> @@ -1496,6 +1506,8 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
>   
>   	/* success */
>   	tport = targetport->private;
> +	kref_init(&tport->ref);
> +
>   	tport->targetport = targetport;
>   	tport->remoteport = (nport->rport) ?  nport->rport->remoteport : NULL;
>   	if (nport->rport)
> @@ -1526,21 +1538,30 @@ __unlink_target_port(struct fcloop_nport *nport)
>   	return tport;
>   }
>   
> -static int
> -__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
> +static void
> +fcloop_targetport_unreg(struct kref *ref)
>   {
> -	int ret;
> +	struct fcloop_tport *tport =
> +		container_of(ref, struct fcloop_tport, ref);
> +	struct fcloop_nport *nport;
>   
> -	if (!tport) {
> -		ret = -EALREADY;
> -		goto out;
> -	}
> +	nport = tport->nport;
> +	nvmet_fc_unregister_targetport(tport->targetport);
>   
> -	ret = nvmet_fc_unregister_targetport(tport->targetport);
> -out:
>   	/* nport ref put: targetport */
>   	fcloop_nport_put(nport);
> -	return ret;
> +}
> +
> +static int
> +fcloop_tport_get(struct fcloop_tport *tport)
> +{
> +	return kref_get_unless_zero(&tport->ref);
> +}
> +
> +static void
> +fcloop_tport_put(struct fcloop_tport *tport)
> +{
> +	kref_put(&tport->ref, fcloop_targetport_unreg);
>   }
>   
>   static ssize_t
> @@ -1576,8 +1597,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
>   	if (!nport)
>   		return -ENOENT;
>   
> -	ret = __targetport_unreg(nport, tport);
> -
> +	fcloop_tport_put(tport);
>   	fcloop_nport_put(nport);
>   
Hmm. Are we sure that the 'tport' reference is always > 1 here? 
Otherwise we'll end up with a funny business when the tport is deleted
yet the nport still has a reference ..

>   	return ret ? ret : count;
> @@ -1696,9 +1716,7 @@ static void __exit fcloop_exit(void)
>   
>   		spin_unlock_irqrestore(&fcloop_lock, flags);
>   
> -		ret = __targetport_unreg(nport, tport);
> -		if (ret)
> -			pr_warn("%s: Failed deleting target port\n", __func__);
> +		fcloop_tport_put(tport);
>   
>   		ret = __remoteport_unreg(nport, rport);
>   		if (ret)
> 

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Re: [PATCH 05/11] nvmet-fcloop: track tport with ref counting
Posted by Daniel Wagner 11 months, 2 weeks ago
On Fri, Feb 28, 2025 at 08:27:40AM +0100, Hannes Reinecke wrote:
> On 2/26/25 19:45, Daniel Wagner wrote:
> > The tport object is created via nvmet_fc_register_targetport and freed
> > via nvmet_fc_unregister_targetport. That means after the port is
> > unregistered nothing should use it. Thus ensure with refcounting
> > that there is no user left before the unregister step.
> > 
> > Signed-off-by: Daniel Wagner <wagi@kernel.org>
> > ---
> >   drivers/nvme/target/fcloop.c | 52 +++++++++++++++++++++++++++++---------------
> >   1 file changed, 35 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
> > index 80693705c069dd114b2d4f15d0482dd2d713a273..2269b4d20af2ef9bb423617b94a5f5326ea124bd 100644
> > --- a/drivers/nvme/target/fcloop.c
> > +++ b/drivers/nvme/target/fcloop.c
> > @@ -233,8 +233,12 @@ struct fcloop_tport {
> >   	spinlock_t			lock;
> >   	struct list_head		ls_list;
> >   	struct work_struct		ls_work;
> > +	struct kref			ref;
> >   };
> > +static int fcloop_tport_get(struct fcloop_tport *tport);
> > +static void fcloop_tport_put(struct fcloop_tport *tport);
> > +
> >   struct fcloop_nport {
> >   	struct fcloop_rport *rport;
> >   	struct fcloop_tport *tport;
> > @@ -426,6 +430,7 @@ fcloop_tport_lsrqst_work(struct work_struct *work)
> >   		spin_lock(&tport->lock);
> >   	}
> >   	spin_unlock(&tport->lock);
> > +	fcloop_tport_put(tport);
> >   }
> >   static int
> > @@ -444,12 +449,16 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
> >   	tls_req->lsreq = lsreq;
> >   	INIT_LIST_HEAD(&tls_req->ls_list);
> > +	if (!tport)
> > +		return -ECONNABORTED;
> > +
> >   	if (!tport->remoteport) {
> >   		tls_req->status = -ECONNREFUSED;
> >   		spin_lock(&tport->lock);
> >   		list_add_tail(&tls_req->ls_list, &tport->ls_list);
> >   		spin_unlock(&tport->lock);
> > -		queue_work(nvmet_wq, &tport->ls_work);
> > +		if (queue_work(nvmet_wq, &tport->ls_work))
> > +			fcloop_tport_get(tport);
> 
> Don't you need to remove the 'tls_req' from the list, too, seeing that
> it'll never be transferred?

Good point. I'll add the error handling.

> > @@ -1576,8 +1597,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
> >   	if (!nport)
> >   		return -ENOENT;
> > -	ret = __targetport_unreg(nport, tport);
> > -
> > +	fcloop_tport_put(tport);
> >   	fcloop_nport_put(nport);
> Hmm. Are we sure that the 'tport' reference is always > 1 here? Otherwise
> we'll end up with a funny business when the tport is deleted
> yet the nport still has a reference ..

Yes, I am very sure about this. This doesn't mean it needs to stay like
this. The goal here is to avoid changing the existing structure of the
code and only annotate the life time of the different objects with ref
counters, so we get rid of the implicit 'rules' when it's safe to
access a pointer and when not.

Anyway, I really don't mind getting this sorted out eventually, but
please not in this series.
Re: [PATCH 05/11] nvmet-fcloop: track tport with ref counting
Posted by Daniel Wagner 11 months, 2 weeks ago
On Fri, Feb 28, 2025 at 09:30:42AM +0100, Daniel Wagner wrote:
> On Fri, Feb 28, 2025 at 08:27:40AM +0100, Hannes Reinecke wrote:
> > >   	if (!tport->remoteport) {
> > >   		tls_req->status = -ECONNREFUSED;
> > >   		spin_lock(&tport->lock);
> > >   		list_add_tail(&tls_req->ls_list, &tport->ls_list);
> > >   		spin_unlock(&tport->lock);
> > > -		queue_work(nvmet_wq, &tport->ls_work);
> > > +		if (queue_work(nvmet_wq, &tport->ls_work))
> > > +			fcloop_tport_get(tport);
> > 
> > Don't you need to remove the 'tls_req' from the list, too, seeing that
> > it'll never be transferred?
> 
> Good point. I'll add the error handling.

In fact, I think a WARN_ONCE is the better choice here, as the element
should not be on the list (an error in the code) and queue_work will only
return false if the work item is already scheduled.