[PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path

Kuen-Han Tsai posted 1 patch 4 weeks ago
drivers/usb/gadget/function/f_ncm.c | 37 ++++++++++++++++-------------
1 file changed, 21 insertions(+), 16 deletions(-)
[PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Kuen-Han Tsai 4 weeks ago
When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
left pointing to a stale address. If a subsequent call to ncm_bind()
fails to allocate the endpoints, the function jumps to the unified error
label. The cleanup code sees the stale ncm->notify_req pointer and calls
usb_ep_free_request().

This results in a NPE because it attempts to access the free_request
function through the endpoint's operations table (ep->ops), which is
NULL.

Refactor the error path to use cascading goto labels, ensuring that
resources are freed in reverse order of allocation. Besides, explicitly
set pointers to NULL after freeing them.

Call trace:
 usb_ep_free_request+0x2c/0xec
 ncm_bind+0x39c/0x3dc
 usb_add_function+0xcc/0x1f0
 configfs_composite_bind+0x468/0x588
 gadget_bind_driver+0x104/0x270
 really_probe+0x190/0x374
 __driver_probe_device+0xa0/0x12c
 driver_probe_device+0x3c/0x218
 __device_attach_driver+0x14c/0x188
 bus_for_each_drv+0x10c/0x168
 __device_attach+0xfc/0x198
 device_initial_probe+0x14/0x24
 bus_probe_device+0x94/0x11c
 device_add+0x268/0x48c
 usb_add_gadget+0x198/0x28c
 dwc3_gadget_init+0x700/0x858
 __dwc3_set_mode+0x3cc/0x664
 process_scheduled_works+0x1d8/0x488
 worker_thread+0x244/0x334
 kthread+0x114/0x1bc
 ret_from_fork+0x10/0x20

Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added")
Cc: stable@kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
---
Tracing:
 ncm_bind: ep_autoconfig OUT failed
 ncm_bind: ncm->notify=0000000060ad7c2d, ncm->notify->ops=0000000000000000
 usb_ep_free_request: (null): req 00000000650c8918 length 0/158 sgs 0/0 stream 0 zsI status 0 --> 0

---
 drivers/usb/gadget/function/f_ncm.c | 37 ++++++++++++++++-------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 58b0dd575af3..0338cb820cb5 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1459,7 +1459,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	mutex_unlock(&ncm_opts->lock);

 	if (status)
-		goto fail;
+		goto fail_os_desc;

 	ncm_opts->bound = true;

@@ -1467,7 +1467,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 				 ARRAY_SIZE(ncm_string_defs));
 	if (IS_ERR(us)) {
 		status = PTR_ERR(us);
-		goto fail;
+		goto fail_os_desc;
 	}
 	ncm_control_intf.iInterface = us[STRING_CTRL_IDX].id;
 	ncm_data_nop_intf.iInterface = us[STRING_DATA_IDX].id;
@@ -1478,7 +1478,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	/* allocate instance-specific interface IDs */
 	status = usb_interface_id(c, f);
 	if (status < 0)
-		goto fail;
+		goto fail_os_desc;
 	ncm->ctrl_id = status;
 	ncm_iad_desc.bFirstInterface = status;

@@ -1491,7 +1491,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)

 	status = usb_interface_id(c, f);
 	if (status < 0)
-		goto fail;
+		goto fail_os_desc;
 	ncm->data_id = status;

 	ncm_data_nop_intf.bInterfaceNumber = status;
@@ -1505,17 +1505,17 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	/* allocate instance-specific endpoints */
 	ep = usb_ep_autoconfig(cdev->gadget, &fs_ncm_in_desc);
 	if (!ep)
-		goto fail;
+		goto fail_os_desc;
 	ncm->port.in_ep = ep;

 	ep = usb_ep_autoconfig(cdev->gadget, &fs_ncm_out_desc);
 	if (!ep)
-		goto fail;
+		goto fail_os_desc;
 	ncm->port.out_ep = ep;

 	ep = usb_ep_autoconfig(cdev->gadget, &fs_ncm_notify_desc);
 	if (!ep)
-		goto fail;
+		goto fail_os_desc;
 	ncm->notify = ep;

 	status = -ENOMEM;
@@ -1523,10 +1523,10 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	/* allocate notification request and buffer */
 	ncm->notify_req = usb_ep_alloc_request(ep, GFP_KERNEL);
 	if (!ncm->notify_req)
-		goto fail;
+		goto fail_os_desc;
 	ncm->notify_req->buf = kmalloc(NCM_STATUS_BYTECOUNT, GFP_KERNEL);
 	if (!ncm->notify_req->buf)
-		goto fail;
+		goto fail_notify_req;
 	ncm->notify_req->context = ncm;
 	ncm->notify_req->complete = ncm_notify_complete;

@@ -1548,7 +1548,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	status = usb_assign_descriptors(f, ncm_fs_function, ncm_hs_function,
 			ncm_ss_function, ncm_ss_function);
 	if (status)
-		goto fail;
+		goto fail_notify_req_buf;

 	/*
 	 * NOTE:  all that is done without knowing or caring about
@@ -1566,15 +1566,17 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 			ncm->notify->name);
 	return 0;

-fail:
+fail_notify_req_buf:
+	kfree(ncm->notify_req->buf);
+	ncm->notify_req->buf = NULL;
+fail_notify_req:
+	usb_ep_free_request(ncm->notify, ncm->notify_req);
+	ncm->notify_req = NULL;
+fail_os_desc:
 	kfree(f->os_desc_table);
+	f->os_desc_table = NULL;
 	f->os_desc_n = 0;

-	if (ncm->notify_req) {
-		kfree(ncm->notify_req->buf);
-		usb_ep_free_request(ncm->notify, ncm->notify_req);
-	}
-
 	ERROR(cdev, "%s: can't bind, err %d\n", f->name, status);

 	return status;
@@ -1734,6 +1736,7 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f)
 	hrtimer_cancel(&ncm->task_timer);

 	kfree(f->os_desc_table);
+	f->os_desc_table = NULL;
 	f->os_desc_n = 0;

 	ncm_string_defs[0].id = 0;
@@ -1745,7 +1748,9 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f)
 	}

 	kfree(ncm->notify_req->buf);
+	ncm->notify_req->buf = NULL;
 	usb_ep_free_request(ncm->notify, ncm->notify_req);
+	ncm->notify_req = NULL;
 }

 static struct usb_function *ncm_alloc(struct usb_function_instance *fi)
--
2.51.0.338.gd7d06c2dae-goog
Re: [PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Greg KH 3 weeks, 5 days ago
On Thu, Sep 04, 2025 at 07:46:13PM +0800, Kuen-Han Tsai wrote:
> When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
> left pointing to a stale address. If a subsequent call to ncm_bind()
> fails to allocate the endpoints, the function jumps to the unified error
> label. The cleanup code sees the stale ncm->notify_req pointer and calls
> usb_ep_free_request().
> 
> This results in a NPE because it attempts to access the free_request
> function through the endpoint's operations table (ep->ops), which is
> NULL.
> 
> Refactor the error path to use cascading goto labels, ensuring that
> resources are freed in reverse order of allocation. Besides, explicitly
> set pointers to NULL after freeing them.

Why must the pointers be set to NULL?  What is checking and requiring
that?

And this unwinding is tailor-made for the guard() type of logic, why not
convert this code to do that instead, which will fix all of these bugs
automatically, right?

thanks,

greg k-h
Re: [PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Kuen-Han Tsai 3 weeks ago
Hi Greg,

On Sat, Sep 6, 2025 at 8:15 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Thu, Sep 04, 2025 at 07:46:13PM +0800, Kuen-Han Tsai wrote:
> > When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
> > left pointing to a stale address. If a subsequent call to ncm_bind()
> > fails to allocate the endpoints, the function jumps to the unified error
> > label. The cleanup code sees the stale ncm->notify_req pointer and calls
> > usb_ep_free_request().
> >
> > This results in a NPE because it attempts to access the free_request
> > function through the endpoint's operations table (ep->ops), which is
> > NULL.
> >
> > Refactor the error path to use cascading goto labels, ensuring that
> > resources are freed in reverse order of allocation. Besides, explicitly
> > set pointers to NULL after freeing them.
>
> Why must the pointers be set to NULL?  What is checking and requiring
> that?

I set them to null as a standard safety measure to prevent potential
use-after-free issues. I can remove it if you prefer.

>
> And this unwinding is tailor-made for the guard() type of logic, why not
> convert this code to do that instead, which will fix all of these bugs
> automatically, right?

The __free() cleanup mechanism is unfortunately infeasible in this
case. The usb_ep_free_request(ep, req) requires two parameters, but
the automatic cleanup mechanism only needs one: the resource being
freed.

Since the struct usb_request doesn't contain the pointer to its
associated endpoint, the @free function cannot retrieve the ep pointer
it needs for the cleanup call.  We would need to add an endpoint
pointer to usb_request to make it work. However, this will be a
significant change and we might also need to refactor drivers that use
the usb_ep_free_request(ep, req), usb_ep_queue(ep, req) and
usb_ep_dequeue(ep, req) as the endpoint parameter is no longer needed.

I also want to point out that this bug isn't specific to the f_ncm
driver. The f_acm, f_rndis and f_ecm are also vulnerable because their
bind paths have the same flaw. We have already observed this issue in
both f_ncm and f_acm on Android devices.

My plan was to merge this fix for f_ncm first and then apply the same
pattern to the other affected drivers. However, I am happy to have a
more thorough design discussion if you feel using __free()/guard()
automatic cleanup is the better path forward.

Regards,
Kuen-Han
Re: [PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Greg KH 3 weeks ago
On Thu, Sep 11, 2025 at 02:50:15PM +0800, Kuen-Han Tsai wrote:
> Hi Greg,
> 
> On Sat, Sep 6, 2025 at 8:15 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> >
> > On Thu, Sep 04, 2025 at 07:46:13PM +0800, Kuen-Han Tsai wrote:
> > > When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
> > > left pointing to a stale address. If a subsequent call to ncm_bind()
> > > fails to allocate the endpoints, the function jumps to the unified error
> > > label. The cleanup code sees the stale ncm->notify_req pointer and calls
> > > usb_ep_free_request().
> > >
> > > This results in a NPE because it attempts to access the free_request
> > > function through the endpoint's operations table (ep->ops), which is
> > > NULL.
> > >
> > > Refactor the error path to use cascading goto labels, ensuring that
> > > resources are freed in reverse order of allocation. Besides, explicitly
> > > set pointers to NULL after freeing them.
> >
> > Why must the pointers be set to NULL?  What is checking and requiring
> > that?
> 
> I set them to null as a standard safety measure to prevent potential
> use-after-free issues. I can remove it if you prefer.

So either you have a use-after-free, or a NULL crash, either way it's
bad and the real bug should be fixed if this can happen.  If it can not
happen, then there is no need to set this to NULL.

> > And this unwinding is tailor-made for the guard() type of logic, why not
> > convert this code to do that instead, which will fix all of these bugs
> > automatically, right?
> 
> The __free() cleanup mechanism is unfortunately infeasible in this
> case. The usb_ep_free_request(ep, req) requires two parameters, but
> the automatic cleanup mechanism only needs one: the resource being
> freed.
> 
> Since the struct usb_request doesn't contain the pointer to its
> associated endpoint, the @free function cannot retrieve the ep pointer
> it needs for the cleanup call.  We would need to add an endpoint
> pointer to usb_request to make it work. However, this will be a
> significant change and we might also need to refactor drivers that use
> the usb_ep_free_request(ep, req), usb_ep_queue(ep, req) and
> usb_ep_dequeue(ep, req) as the endpoint parameter is no longer needed.

It's odd that the ep is needed to create a request, but it's not saved.
So yes, I think it should be saved, and that will make the cleanup logic
a lot simpler, as well as allow us to use the __free() logic overall.

> I also want to point out that this bug isn't specific to the f_ncm
> driver. The f_acm, f_rndis and f_ecm are also vulnerable because their
> bind paths have the same flaw. We have already observed this issue in
> both f_ncm and f_acm on Android devices.
> 
> My plan was to merge this fix for f_ncm first and then apply the same
> pattern to the other affected drivers. However, I am happy to have a
> more thorough design discussion if you feel using __free()/guard()
> automatic cleanup is the better path forward.

I think all of them need to be fixed up, and by adding the endpoint
pointer to the request, that should help make the logic overall for all
of these drivers simpler and easier to maintain over time.

So yes, if you could do that, it would be wonderful.

thanks,

greg k-h
Re: [PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Krzysztof Kozlowski 3 weeks ago
On 11/09/2025 10:35, Greg KH wrote:
> On Thu, Sep 11, 2025 at 02:50:15PM +0800, Kuen-Han Tsai wrote:
>> Hi Greg,
>>
>> On Sat, Sep 6, 2025 at 8:15 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>>>
>>> On Thu, Sep 04, 2025 at 07:46:13PM +0800, Kuen-Han Tsai wrote:
>>>> When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
>>>> left pointing to a stale address. If a subsequent call to ncm_bind()
>>>> fails to allocate the endpoints, the function jumps to the unified error
>>>> label. The cleanup code sees the stale ncm->notify_req pointer and calls
>>>> usb_ep_free_request().
>>>>
>>>> This results in a NPE because it attempts to access the free_request
>>>> function through the endpoint's operations table (ep->ops), which is
>>>> NULL.
>>>>
>>>> Refactor the error path to use cascading goto labels, ensuring that
>>>> resources are freed in reverse order of allocation. Besides, explicitly
>>>> set pointers to NULL after freeing them.
>>>
>>> Why must the pointers be set to NULL?  What is checking and requiring
>>> that?
>>
>> I set them to null as a standard safety measure to prevent potential
>> use-after-free issues. I can remove it if you prefer.
> 
> So either you have a use-after-free, or a NULL crash, either way it's
> bad and the real bug should be fixed if this can happen.  If it can not
> happen, then there is no need to set this to NULL.


... or there is a second (wrong) free somewhere else, which would crash
and this NULL prevents it. In that case there is a real bug which,
instead of being solved, is being hidden by this NULL assignment making
it even more difficult to find and fix later. :(

Usually that's the case I saw when people null-ify pointer after free.

Best regards,
Krzysztof
Re: [PATCH] usb: gadget: f_ncm: Fix NPE in ncm_bind error path
Posted by Kuen-Han Tsai 3 weeks ago
Hi Greg, Krzysztof,

On Thu, Sep 11, 2025 at 4:49 PM Krzysztof Kozlowski
<krzysztof.kozlowski@linaro.org> wrote:
>
> On 11/09/2025 10:35, Greg KH wrote:
> > On Thu, Sep 11, 2025 at 02:50:15PM +0800, Kuen-Han Tsai wrote:
> >> Hi Greg,
> >>
> >> On Sat, Sep 6, 2025 at 8:15 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> >>>
> >>> On Thu, Sep 04, 2025 at 07:46:13PM +0800, Kuen-Han Tsai wrote:
> >>>> When an ncm_bind/unbind cycle occurs, the ncm->notify_req pointer is
> >>>> left pointing to a stale address. If a subsequent call to ncm_bind()
> >>>> fails to allocate the endpoints, the function jumps to the unified error
> >>>> label. The cleanup code sees the stale ncm->notify_req pointer and calls
> >>>> usb_ep_free_request().
> >>>>
> >>>> This results in a NPE because it attempts to access the free_request
> >>>> function through the endpoint's operations table (ep->ops), which is
> >>>> NULL.
> >>>>
> >>>> Refactor the error path to use cascading goto labels, ensuring that
> >>>> resources are freed in reverse order of allocation. Besides, explicitly
> >>>> set pointers to NULL after freeing them.
> >>>
> >>> Why must the pointers be set to NULL?  What is checking and requiring
> >>> that?
> >>
> >> I set them to null as a standard safety measure to prevent potential
> >> use-after-free issues. I can remove it if you prefer.
> >
> > So either you have a use-after-free, or a NULL crash, either way it's
> > bad and the real bug should be fixed if this can happen.  If it can not
> > happen, then there is no need to set this to NULL.
>
>
> ... or there is a second (wrong) free somewhere else, which would crash
> and this NULL prevents it. In that case there is a real bug which,
> instead of being solved, is being hidden by this NULL assignment making
> it even more difficult to find and fix later. :(
>
> Usually that's the case I saw when people null-ify pointer after free.

I really appreciate you taking the time to explain this. I see your
point clearly now: my change could potentially mask the real bug
rather than fixing it.

I'll rework the patch to use the __free() helpers for automatic cleanup.

Regards,
Kuen-Han