[PATCH v2 7/7] usb: gadget: f_ncm: Fix net_device lifecycle with device_move

Kuen-Han Tsai posted 7 patches 1 month ago
[PATCH v2 7/7] usb: gadget: f_ncm: Fix net_device lifecycle with device_move
Posted by Kuen-Han Tsai 1 month ago
The network device outlived its parent gadget device during
disconnection, resulting in dangling sysfs links and null pointer
dereference problems.

A prior attempt to solve this by removing SET_NETDEV_DEV entirely [1]
was reverted due to power management ordering concerns and a NO-CARRIER
regression.

A subsequent attempt to defer net_device allocation to bind [2] broke
1:1 mapping between function instance and network device, making it
impossible for configfs to report the resolved interface name. This
results in a regression where the DHCP server fails on pmOS.

Use device_move to reparent the net_device between the gadget device and
/sys/devices/virtual/ across bind/unbind cycles. This preserves the
network interface across USB reconnection, allowing the DHCP server to
retain their binding.

Introduce gether_attach_gadget()/gether_detach_gadget() helpers and use
__free(detach_gadget) macro to undo attachment on bind failure. The
bind_count ensures device_move executes only on the first bind.

[1] https://lore.kernel.org/lkml/f2a4f9847617a0929d62025748384092e5f35cce.camel@crapouillou.net/
[2] https://lore.kernel.org/linux-usb/795ea759-7eaf-4f78-81f4-01ffbf2d7961@ixit.cz/

Fixes: 40d133d7f542 ("usb: gadget: f_ncm: convert to new function interface with backward compatibility")
Cc: stable@kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai@google.com>
---
 drivers/usb/gadget/function/f_ncm.c   | 38 +++++++++++++++++++++++------------
 drivers/usb/gadget/function/u_ether.c | 22 ++++++++++++++++++++
 drivers/usb/gadget/function/u_ether.h | 26 ++++++++++++++++++++++++
 drivers/usb/gadget/function/u_ncm.h   |  2 +-
 4 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 3d772c9beb91..a6fa5ed3d6cb 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1439,6 +1439,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	struct f_ncm_opts	*ncm_opts;
 
 	struct usb_os_desc_table	*os_desc_table __free(kfree) = NULL;
+	struct net_device		*net __free(detach_gadget) = NULL;
 	struct usb_request		*request __free(free_usb_request) = NULL;
 
 	if (!can_support_ecm(cdev->gadget))
@@ -1452,18 +1453,19 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 			return -ENOMEM;
 	}
 
-	mutex_lock(&ncm_opts->lock);
-	gether_set_gadget(ncm_opts->net, cdev->gadget);
-	if (!ncm_opts->bound) {
-		ncm_opts->net->mtu = (ncm_opts->max_segment_size - ETH_HLEN);
-		status = gether_register_netdev(ncm_opts->net);
-	}
-	mutex_unlock(&ncm_opts->lock);
-
-	if (status)
-		return status;
-
-	ncm_opts->bound = true;
+	scoped_guard(mutex, &ncm_opts->lock)
+		if (ncm_opts->bind_count == 0) {
+			if (!device_is_registered(&ncm_opts->net->dev)) {
+				ncm_opts->net->mtu = (ncm_opts->max_segment_size - ETH_HLEN);
+				gether_set_gadget(ncm_opts->net, cdev->gadget);
+				status = gether_register_netdev(ncm_opts->net);
+			} else
+				status = gether_attach_gadget(ncm_opts->net, cdev->gadget);
+
+			if (status)
+				return status;
+			net = ncm_opts->net;
+		}
 
 	ncm_string_defs[1].s = ncm->ethaddr;
 
@@ -1564,6 +1566,9 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 	}
 	ncm->notify_req = no_free_ptr(request);
 
+	ncm_opts->bind_count++;
+	retain_and_null_ptr(net);
+
 	DBG(cdev, "CDC Network: IN/%s OUT/%s NOTIFY/%s\n",
 			ncm->port.in_ep->name, ncm->port.out_ep->name,
 			ncm->notify->name);
@@ -1655,7 +1660,7 @@ static void ncm_free_inst(struct usb_function_instance *f)
 	struct f_ncm_opts *opts;
 
 	opts = container_of(f, struct f_ncm_opts, func_inst);
-	if (opts->bound)
+	if (device_is_registered(&opts->net->dev))
 		gether_cleanup(netdev_priv(opts->net));
 	else
 		free_netdev(opts->net);
@@ -1718,9 +1723,12 @@ static void ncm_free(struct usb_function *f)
 static void ncm_unbind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct f_ncm *ncm = func_to_ncm(f);
+	struct f_ncm_opts *ncm_opts;
 
 	DBG(c->cdev, "ncm unbind\n");
 
+	ncm_opts = container_of(f->fi, struct f_ncm_opts, func_inst);
+
 	hrtimer_cancel(&ncm->task_timer);
 
 	kfree(f->os_desc_table);
@@ -1736,6 +1744,10 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f)
 
 	kfree(ncm->notify_req->buf);
 	usb_ep_free_request(ncm->notify, ncm->notify_req);
+
+	ncm_opts->bind_count--;
+	if (ncm_opts->bind_count == 0)
+		gether_detach_gadget(ncm_opts->net);
 }
 
 static struct usb_function *ncm_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index c47965d850d4..1a9e7c495e2e 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -897,6 +897,28 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
 }
 EXPORT_SYMBOL_GPL(gether_set_gadget);
 
+int gether_attach_gadget(struct net_device *net, struct usb_gadget *g)
+{
+	int ret;
+
+	ret = device_move(&net->dev, &g->dev, DPM_ORDER_DEV_AFTER_PARENT);
+	if (ret)
+		return ret;
+
+	gether_set_gadget(net, g);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gether_attach_gadget);
+
+void gether_detach_gadget(struct net_device *net)
+{
+	struct eth_dev *dev = netdev_priv(net);
+
+	device_move(&net->dev, NULL, DPM_ORDER_NONE);
+	dev->gadget = NULL;
+}
+EXPORT_SYMBOL_GPL(gether_detach_gadget);
+
 int gether_set_dev_addr(struct net_device *net, const char *dev_addr)
 {
 	struct eth_dev *dev;
diff --git a/drivers/usb/gadget/function/u_ether.h b/drivers/usb/gadget/function/u_ether.h
index 34be220cef77..c85a1cf3c115 100644
--- a/drivers/usb/gadget/function/u_ether.h
+++ b/drivers/usb/gadget/function/u_ether.h
@@ -150,6 +150,32 @@ static inline struct net_device *gether_setup_default(void)
  */
 void gether_set_gadget(struct net_device *net, struct usb_gadget *g);
 
+/**
+ * gether_attach_gadget - Reparent net_device to the gadget device.
+ * @net: The network device to reparent.
+ * @g: The target USB gadget device to parent to.
+ *
+ * This function moves the network device to be a child of the USB gadget
+ * device in the device hierarchy. This is typically done when the function
+ * is bound to a configuration.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+int gether_attach_gadget(struct net_device *net, struct usb_gadget *g);
+
+/**
+ * gether_detach_gadget - Detach net_device from its gadget parent.
+ * @net: The network device to detach.
+ *
+ * This function moves the network device to be a child of the virtual
+ * devices parent, effectively detaching it from the USB gadget device
+ * hierarchy. This is typically done when the function is unbound
+ * from a configuration but the instance is not yet freed.
+ */
+void gether_detach_gadget(struct net_device *net);
+
+DEFINE_FREE(detach_gadget, struct net_device *, if (_T) gether_detach_gadget(_T))
+
 /**
  * gether_set_dev_addr - initialize an ethernet-over-usb link with eth address
  * @net: device representing this link
diff --git a/drivers/usb/gadget/function/u_ncm.h b/drivers/usb/gadget/function/u_ncm.h
index 49ec095cdb4b..b1f3db8b68c1 100644
--- a/drivers/usb/gadget/function/u_ncm.h
+++ b/drivers/usb/gadget/function/u_ncm.h
@@ -18,7 +18,7 @@
 struct f_ncm_opts {
 	struct usb_function_instance	func_inst;
 	struct net_device		*net;
-	bool				bound;
+	int				bind_count;
 
 	struct config_group		*ncm_interf_group;
 	struct usb_os_desc		ncm_os_desc;

-- 
2.53.0.473.g4a7958ca14-goog
Re: [PATCH v2 7/7] usb: gadget: f_ncm: Fix net_device lifecycle with device_move
Posted by Val Packett 3 weeks, 4 days ago
Hi,

On 3/9/26 9:04 AM, Kuen-Han Tsai wrote:
> The network device outlived its parent gadget device during
> disconnection, resulting in dangling sysfs links and null pointer
> dereference problems.
>
> A prior attempt to solve this by removing SET_NETDEV_DEV entirely [1]
> was reverted due to power management ordering concerns and a NO-CARRIER
> regression.
>
> A subsequent attempt to defer net_device allocation to bind [2] broke
> 1:1 mapping between function instance and network device, making it
> impossible for configfs to report the resolved interface name. This
> results in a regression where the DHCP server fails on pmOS.
>
> [..]

I just saw that this was the last commit touching u_ether while 
debugging… the DHCP server failing on pmOS. (In the initrd, even).

Specifically, udev calling ethtool_get_drvinfo and eth_get_drvinfo 
dereferencing an unset dev->gadget:


[    7.528277] [pmOS-rd]:   Setting up USB gadget through configfs
[    7.539437] configfs-gadget.g1 gadget.0: HOST MAC 2a:a6:63:b7:92:23
[    7.545914] configfs-gadget.g1 gadget.0: MAC 76:1d:2b:16:aa:25
[    7.577888] [pmOS-rd]: Trying to start server with parameters: Server 
IP addr: 172.16.42.1:67, client IP addr: 172.16.42.2, interface: usb0
[    7.591522] [pmOS-rd]: Entering debug shell
[    7.597590] Unable to handle kernel NULL pointer dereference at 
virtual address 0000000000000080
[    7.606670] Mem abort info:
[    7.609571]   ESR = 0x0000000096000004
[    7.613462]   EC = 0x25: DABT (current EL), IL = 32 bits
[    7.618942]   SET = 0, FnV = 0
[    7.622105]   EA = 0, S1PTW = 0
[    7.625354]   FSC = 0x04: level 0 translation fault
[    7.630395] Data abort info:
[    7.630398]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[    7.630401]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[    7.630404]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[    7.630407] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000107b18000
[    7.630411] [0000000000000080] pgd=0000000000000000, p4d=0000000000000000
[    7.630420] Internal error: Oops: 0000000096000004 [#1]  SMP
[    7.630425] Modules linked in: typec msm ubwc_config mdt_loader ocmem 
rtc_pm8xxx drm_gpuvm drm_exec i2c_qcom_geni llcc_qcom gpi gpu_sched 
drm_client_lib phy_qcom_snps_femto_v2 drm_display_helper cec 
drm_dp_aux_bus icc_bwmon drm_kms_helper drm backlight ufs_qcom 
phy_qcom_qmp_ufs icc_osm_l3 pmic_glink pdr_interface qcom_pdr_msg 
qmi_helpers
[    7.630486] CPU: 1 UID: 0 PID: 175 Comm: (udev-worker) Tainted: G    
     W  7.0.0-rc3-next-20260313-00118-gf4f287b6004a-dirty #59 PREEMPT(full)
[    7.630493] Tainted: [W]=WARN
[    7.630495] Hardware name: Motorola edge 30 (DT)
[    7.630499] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS 
BTYPE=--)
[    7.630503] pc : eth_get_drvinfo+0x50/0x90 <..snip..>
[    7.630595] Call trace:
[    7.630598]  eth_get_drvinfo+0x50/0x90 (P)
[    7.630608]  ethtool_get_drvinfo+0x5c/0x1f0
[    7.630617]  __dev_ethtool+0xaec/0x1fe0
[    7.630622]  dev_ethtool+0x134/0x2e0
[    7.630627]  dev_ioctl+0x338/0x560
[    7.630633]  sock_do_ioctl+0xe0/0x128
[    7.630642]  sock_ioctl+0x2cc/0x3e0
[    7.630647]  __arm64_sys_ioctl+0xac/0x108
[    7.630656]  invoke_syscall.constprop.0+0x48/0x100
[    7.630664]  el0_svc_common.constprop.0+0x40/0xe8
[    7.630670]  do_el0_svc+0x24/0x38
[    7.630676]  el0_svc+0x34/0x180
[    7.642931] [pmOS-rd]: /usr/bin/buffyboard
[    7.644473]  el0t_64_sync_handler+0xa0/0xe8
[    7.644482]  el0t_64_sync+0x17c/0x180
[    7.644491] Code: 91094021 94134bd9 f9457680 d2800402 (f9404001)
[    7.644495] ---[ end trace 0000000000000000 ]---

As a "workaround", this works:


--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -113,8 +113,14 @@

      strscpy(p->driver, "g_ether", sizeof(p->driver));
      strscpy(p->version, UETH__VERSION, sizeof(p->version));
-    strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version));
-    strscpy(p->bus_info, dev_name(&dev->gadget->dev), sizeof(p->bus_info));
+    if (dev->gadget) {
+        strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version));
+        strscpy(p->bus_info, dev_name(&dev->gadget->dev), 
sizeof(p->bus_info));
+    } else {
+        pr_warn("%s: called with no gadget set\n", __func__);
+        strscpy(p->fw_version, "N/A", sizeof(p->fw_version));
+        strscpy(p->bus_info, "platform", sizeof(p->bus_info));
+    }
  }

  /* REVISIT can also support:

..or would that not be a workaround? The lifecycle of gadget being set 
seems kinda decoupled from the lifecycle of the registration (??) And as 
long as it's registered, the dev info can be queried (?)


Thanks,
~val

Re: [PATCH v2 7/7] usb: gadget: f_ncm: Fix net_device lifecycle with device_move
Posted by Kuen-Han Tsai 3 weeks, 3 days ago
Hi Val,

On Sun, Mar 15, 2026 at 1:21 PM Val Packett <val@packett.cool> wrote:
>
> Hi,
>
> On 3/9/26 9:04 AM, Kuen-Han Tsai wrote:
> > The network device outlived its parent gadget device during
> > disconnection, resulting in dangling sysfs links and null pointer
> > dereference problems.
> >
> > A prior attempt to solve this by removing SET_NETDEV_DEV entirely [1]
> > was reverted due to power management ordering concerns and a NO-CARRIER
> > regression.
> >
> > A subsequent attempt to defer net_device allocation to bind [2] broke
> > 1:1 mapping between function instance and network device, making it
> > impossible for configfs to report the resolved interface name. This
> > results in a regression where the DHCP server fails on pmOS.
> >
> > [..]
>
> I just saw that this was the last commit touching u_ether while
> debugging… the DHCP server failing on pmOS. (In the initrd, even).
>
> Specifically, udev calling ethtool_get_drvinfo and eth_get_drvinfo
> dereferencing an unset dev->gadget:

Thanks for the report and testing.

I can reproduce the problem on a Pixel 3 by dropping into the pmOS
debug shell. When pmOS drops into the debug shell, it temporarily
unbinds the gadget to reconfigure the USB functions. Since my recent
patch intentionally reparented the net_device to /sys/devices/virtual
during unbind, dev>gadget became NULL. Meanwhile, the ethtool queries
on the surviving interface, leading to a null pointer dereference.

>
> [    7.528277] [pmOS-rd]:   Setting up USB gadget through configfs
> [    7.539437] configfs-gadget.g1 gadget.0: HOST MAC 2a:a6:63:b7:92:23
> [    7.545914] configfs-gadget.g1 gadget.0: MAC 76:1d:2b:16:aa:25
> [    7.577888] [pmOS-rd]: Trying to start server with parameters: Server
> IP addr: 172.16.42.1:67, client IP addr: 172.16.42.2, interface: usb0
> [    7.591522] [pmOS-rd]: Entering debug shell
> [    7.597590] Unable to handle kernel NULL pointer dereference at
> virtual address 0000000000000080
> [    7.606670] Mem abort info:
> [    7.609571]   ESR = 0x0000000096000004
> [    7.613462]   EC = 0x25: DABT (current EL), IL = 32 bits
> [    7.618942]   SET = 0, FnV = 0
> [    7.622105]   EA = 0, S1PTW = 0
> [    7.625354]   FSC = 0x04: level 0 translation fault
> [    7.630395] Data abort info:
> [    7.630398]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
> [    7.630401]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> [    7.630404]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> [    7.630407] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000107b18000
> [    7.630411] [0000000000000080] pgd=0000000000000000, p4d=0000000000000000
> [    7.630420] Internal error: Oops: 0000000096000004 [#1]  SMP
> [    7.630425] Modules linked in: typec msm ubwc_config mdt_loader ocmem
> rtc_pm8xxx drm_gpuvm drm_exec i2c_qcom_geni llcc_qcom gpi gpu_sched
> drm_client_lib phy_qcom_snps_femto_v2 drm_display_helper cec
> drm_dp_aux_bus icc_bwmon drm_kms_helper drm backlight ufs_qcom
> phy_qcom_qmp_ufs icc_osm_l3 pmic_glink pdr_interface qcom_pdr_msg
> qmi_helpers
> [    7.630486] CPU: 1 UID: 0 PID: 175 Comm: (udev-worker) Tainted: G
>      W  7.0.0-rc3-next-20260313-00118-gf4f287b6004a-dirty #59 PREEMPT(full)
> [    7.630493] Tainted: [W]=WARN
> [    7.630495] Hardware name: Motorola edge 30 (DT)
> [    7.630499] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS
> BTYPE=--)
> [    7.630503] pc : eth_get_drvinfo+0x50/0x90 <..snip..>
> [    7.630595] Call trace:
> [    7.630598]  eth_get_drvinfo+0x50/0x90 (P)
> [    7.630608]  ethtool_get_drvinfo+0x5c/0x1f0
> [    7.630617]  __dev_ethtool+0xaec/0x1fe0
> [    7.630622]  dev_ethtool+0x134/0x2e0
> [    7.630627]  dev_ioctl+0x338/0x560
> [    7.630633]  sock_do_ioctl+0xe0/0x128
> [    7.630642]  sock_ioctl+0x2cc/0x3e0
> [    7.630647]  __arm64_sys_ioctl+0xac/0x108
> [    7.630656]  invoke_syscall.constprop.0+0x48/0x100
> [    7.630664]  el0_svc_common.constprop.0+0x40/0xe8
> [    7.630670]  do_el0_svc+0x24/0x38
> [    7.630676]  el0_svc+0x34/0x180
> [    7.642931] [pmOS-rd]: /usr/bin/buffyboard
> [    7.644473]  el0t_64_sync_handler+0xa0/0xe8
> [    7.644482]  el0t_64_sync+0x17c/0x180
> [    7.644491] Code: 91094021 94134bd9 f9457680 d2800402 (f9404001)
> [    7.644495] ---[ end trace 0000000000000000 ]---
>
> As a "workaround", this works:
>
>
> --- a/drivers/usb/gadget/function/u_ether.c
> +++ b/drivers/usb/gadget/function/u_ether.c
> @@ -113,8 +113,14 @@
>
>       strscpy(p->driver, "g_ether", sizeof(p->driver));
>       strscpy(p->version, UETH__VERSION, sizeof(p->version));
> -    strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version));
> -    strscpy(p->bus_info, dev_name(&dev->gadget->dev), sizeof(p->bus_info));
> +    if (dev->gadget) {
> +        strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version));
> +        strscpy(p->bus_info, dev_name(&dev->gadget->dev),
> sizeof(p->bus_info));
> +    } else {
> +        pr_warn("%s: called with no gadget set\n", __func__);
> +        strscpy(p->fw_version, "N/A", sizeof(p->fw_version));
> +        strscpy(p->bus_info, "platform", sizeof(p->bus_info));
> +    }
>   }
>
>   /* REVISIT can also support:
>
> ..or would that not be a workaround? The lifecycle of gadget being set
> seems kinda decoupled from the lifecycle of the registration (??) And as
> long as it's registered, the dev info can be queried (?)
>
>
> Thanks,
> ~val
>

I believe your suggested fix correctly handles this detached state.
One minor suggestion: looking at ethtool_get_drvinfo() in
net/ethtool/ioctl.c, we can simply skip the strscpy calls entirely
when dev->gadget is NULL. ethtool_get_drvinfo() checks if bus_info or
fw_version are empty strings and handles the fallback natively, so we
don't need to explicitly copy "N/A" or "platform".

I'll send out a standalone fix shortly and will include the
Suggested-by and Reported-by tags for you. Thanks again for catching
this!

Regards,
Kuen-Han