[PATCH v2] media: imon: make send_packet() more robust

Tetsuo Handa posted 1 patch 2 months, 3 weeks ago
There is a newer version of this series
drivers/media/rc/imon.c | 69 +++++++++++++++++++++++++----------------
1 file changed, 42 insertions(+), 27 deletions(-)
[PATCH v2] media: imon: make send_packet() more robust
Posted by Tetsuo Handa 2 months, 3 weeks ago
syzbot is reporting that imon has three problems which result in
hung tasks due to forever holding device lock [1].

First problem is that when usb_rx_callback_intf0() once got -EPROTO error
after ictx->dev_present_intf0 became true, usb_rx_callback_intf0()
resubmits urb after printk(), and resubmitted urb causes
usb_rx_callback_intf0() to again get -EPROTO error. This results in
printk() flooding (RCU stalls).

Alan Stern commented [2] that

  In theory it's okay to resubmit _if_ the driver has a robust
  error-recovery scheme (such as giving up after some fixed limit on the
  number of errors or after some fixed time has elapsed, perhaps with a
  time delay to prevent a flood of errors).  Most drivers don't bother to
  do this; they simply give up right away.  This makes them more
  vulnerable to short-term noise interference during USB transfers, but in
  reality such interference is quite rare.  There's nothing really wrong
  with giving up right away.

but imon has a poor error-recovery scheme which just retries forever;
this behavior should be fixed.

Since I'm not sure whether it is safe for imon users to give up upon any
error code, this patch takes care of only union of error codes chosen from
modules in drivers/media/rc/ directory which handle -EPROTO error (i.e.
ir_toy, mceusb and igorplugusb).

Second problem is that when usb_rx_callback_intf0() once got -EPROTO error
before ictx->dev_present_intf0 becomes true, usb_rx_callback_intf0() always
resubmits urb due to commit 8791d63af0cf ("[media] imon: don't wedge
hardware after early callbacks"). The ictx->dev_present_intf0 test was
introduced by commit 6f6b90c9231a ("[media] imon: don't parse scancodes
until intf configured"), but that commit did not call usb_unlink_urb()
when usb_rx_callback_intf0() got an error. Move the ictx->dev_present_intf0
test to immediately before imon_incoming_packet() so that we can call
usb_unlink_urb() as needed, or the first problem explained above happens
without printk() flooding (i.e. hung task).

Third problem is that when usb_rx_callback_intf0() is not called for some
reason (e.g. flaky hardware; the reproducer for this problem sometimes
prevents usb_rx_callback_intf0() from being called),
wait_for_completion_interruptible() in send_packet() never returns (i.e.
hung task). As a workaround for such situation, change send_packet() to
wait for completion with timeout of 10 seconds.

Also, move mutex_trylock() in imon_ir_change_protocol() to the beginning,
for memcpy() which modifies ictx->usb_tx_buf should be protected by
ictx->lock.

Also, verify at the beginning of send_packet() that ictx->lock is held
in case send_packet() is by error called from imon_ir_change_protocol()
when mutex_trylock() failed due to concurrent requests.

Link: https://syzkaller.appspot.com/bug?extid=592e2ab8775dbe0bf09a [1]
Link: https://lkml.kernel.org/r/d6da6709-d799-4be3-a695-850bddd6eb24@rowland.harvard.edu [2]
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v2:
  Updated patch description.

 drivers/media/rc/imon.c | 69 +++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index f5221b018808..3469a401a572 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -598,6 +598,8 @@ static int send_packet(struct imon_context *ictx)
 	int retval = 0;
 	struct usb_ctrlrequest *control_req = NULL;
 
+	lockdep_assert_held(&ictx->lock);
+
 	/* Check if we need to use control or interrupt urb */
 	if (!ictx->tx_control) {
 		pipe = usb_sndintpipe(ictx->usbdev_intf0,
@@ -645,12 +647,15 @@ static int send_packet(struct imon_context *ictx)
 		smp_rmb(); /* ensure later readers know we're not busy */
 		pr_err_ratelimited("error submitting urb(%d)\n", retval);
 	} else {
-		/* Wait for transmission to complete (or abort) */
-		retval = wait_for_completion_interruptible(
-				&ictx->tx.finished);
-		if (retval) {
+		/* Wait for transmission to complete (or abort or timeout) */
+		retval = wait_for_completion_interruptible_timeout(&ictx->tx.finished, 10 * HZ);
+		if (retval <= 0) {
 			usb_kill_urb(ictx->tx_urb);
 			pr_err_ratelimited("task interrupted\n");
+			if (retval < 0)
+				ictx->tx.status = retval;
+			else
+				ictx->tx.status = -ETIMEDOUT;
 		}
 
 		ictx->tx.busy = false;
@@ -1121,7 +1126,7 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 *rc_proto)
 	int retval;
 	struct imon_context *ictx = rc->priv;
 	struct device *dev = ictx->dev;
-	bool unlock = false;
+	const bool unlock = mutex_trylock(&ictx->lock);
 	unsigned char ir_proto_packet[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 };
 
@@ -1148,8 +1153,6 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 *rc_proto)
 
 	memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet));
 
-	unlock = mutex_trylock(&ictx->lock);
-
 	retval = send_packet(ictx);
 	if (retval)
 		goto out;
@@ -1745,14 +1748,6 @@ static void usb_rx_callback_intf0(struct urb *urb)
 	if (!ictx)
 		return;
 
-	/*
-	 * if we get a callback before we're done configuring the hardware, we
-	 * can't yet process the data, as there's nowhere to send it, but we
-	 * still need to submit a new rx URB to avoid wedging the hardware
-	 */
-	if (!ictx->dev_present_intf0)
-		goto out;
-
 	switch (urb->status) {
 	case -ENOENT:		/* usbcore unlink successful! */
 		return;
@@ -1761,16 +1756,30 @@ static void usb_rx_callback_intf0(struct urb *urb)
 		break;
 
 	case 0:
-		imon_incoming_packet(ictx, urb, intfnum);
+		/*
+		 * if we get a callback before we're done configuring the hardware, we
+		 * can't yet process the data, as there's nowhere to send it, but we
+		 * still need to submit a new rx URB to avoid wedging the hardware
+		 */
+		if (ictx->dev_present_intf0)
+			imon_incoming_packet(ictx, urb, intfnum);
 		break;
 
+	case -ECONNRESET:
+	case -EILSEQ:
+	case -EPROTO:
+	case -EPIPE:
+		dev_warn(ictx->dev, "imon %s: status(%d)\n",
+			 __func__, urb->status);
+		usb_unlink_urb(urb);
+		return;
+
 	default:
 		dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
 			 __func__, urb->status);
 		break;
 	}
 
-out:
 	usb_submit_urb(ictx->rx_urb_intf0, GFP_ATOMIC);
 }
 
@@ -1786,14 +1795,6 @@ static void usb_rx_callback_intf1(struct urb *urb)
 	if (!ictx)
 		return;
 
-	/*
-	 * if we get a callback before we're done configuring the hardware, we
-	 * can't yet process the data, as there's nowhere to send it, but we
-	 * still need to submit a new rx URB to avoid wedging the hardware
-	 */
-	if (!ictx->dev_present_intf1)
-		goto out;
-
 	switch (urb->status) {
 	case -ENOENT:		/* usbcore unlink successful! */
 		return;
@@ -1802,16 +1803,30 @@ static void usb_rx_callback_intf1(struct urb *urb)
 		break;
 
 	case 0:
-		imon_incoming_packet(ictx, urb, intfnum);
+		/*
+		 * if we get a callback before we're done configuring the hardware, we
+		 * can't yet process the data, as there's nowhere to send it, but we
+		 * still need to submit a new rx URB to avoid wedging the hardware
+		 */
+		if (ictx->dev_present_intf1)
+			imon_incoming_packet(ictx, urb, intfnum);
 		break;
 
+	case -ECONNRESET:
+	case -EILSEQ:
+	case -EPROTO:
+	case -EPIPE:
+		dev_warn(ictx->dev, "imon %s: status(%d)\n",
+			 __func__, urb->status);
+		usb_unlink_urb(urb);
+		return;
+
 	default:
 		dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
 			 __func__, urb->status);
 		break;
 	}
 
-out:
 	usb_submit_urb(ictx->rx_urb_intf1, GFP_ATOMIC);
 }
 
-- 
2.50.1
Re: [PATCH v2] media: imon: make send_packet() more robust
Posted by Alan Stern 2 months, 3 weeks ago
On Wed, Jul 16, 2025 at 11:07:17PM +0900, Tetsuo Handa wrote:
> syzbot is reporting that imon has three problems which result in
> hung tasks due to forever holding device lock [1].
> 
> First problem is that when usb_rx_callback_intf0() once got -EPROTO error
> after ictx->dev_present_intf0 became true, usb_rx_callback_intf0()
> resubmits urb after printk(), and resubmitted urb causes
> usb_rx_callback_intf0() to again get -EPROTO error. This results in
> printk() flooding (RCU stalls).
> 
> Alan Stern commented [2] that
> 
>   In theory it's okay to resubmit _if_ the driver has a robust
>   error-recovery scheme (such as giving up after some fixed limit on the
>   number of errors or after some fixed time has elapsed, perhaps with a
>   time delay to prevent a flood of errors).  Most drivers don't bother to
>   do this; they simply give up right away.  This makes them more
>   vulnerable to short-term noise interference during USB transfers, but in
>   reality such interference is quite rare.  There's nothing really wrong
>   with giving up right away.
> 
> but imon has a poor error-recovery scheme which just retries forever;
> this behavior should be fixed.
> 
> Since I'm not sure whether it is safe for imon users to give up upon any
> error code, this patch takes care of only union of error codes chosen from
> modules in drivers/media/rc/ directory which handle -EPROTO error (i.e.
> ir_toy, mceusb and igorplugusb).
> 
> Second problem is that when usb_rx_callback_intf0() once got -EPROTO error
> before ictx->dev_present_intf0 becomes true, usb_rx_callback_intf0() always
> resubmits urb due to commit 8791d63af0cf ("[media] imon: don't wedge
> hardware after early callbacks"). The ictx->dev_present_intf0 test was
> introduced by commit 6f6b90c9231a ("[media] imon: don't parse scancodes
> until intf configured"), but that commit did not call usb_unlink_urb()
> when usb_rx_callback_intf0() got an error. Move the ictx->dev_present_intf0
> test to immediately before imon_incoming_packet() so that we can call
> usb_unlink_urb() as needed, or the first problem explained above happens
> without printk() flooding (i.e. hung task).
> 
> Third problem is that when usb_rx_callback_intf0() is not called for some
> reason (e.g. flaky hardware; the reproducer for this problem sometimes
> prevents usb_rx_callback_intf0() from being called),
> wait_for_completion_interruptible() in send_packet() never returns (i.e.
> hung task). As a workaround for such situation, change send_packet() to
> wait for completion with timeout of 10 seconds.
> 
> Also, move mutex_trylock() in imon_ir_change_protocol() to the beginning,
> for memcpy() which modifies ictx->usb_tx_buf should be protected by
> ictx->lock.
> 
> Also, verify at the beginning of send_packet() that ictx->lock is held
> in case send_packet() is by error called from imon_ir_change_protocol()
> when mutex_trylock() failed due to concurrent requests.
> 
> Link: https://syzkaller.appspot.com/bug?extid=592e2ab8775dbe0bf09a [1]
> Link: https://lkml.kernel.org/r/d6da6709-d799-4be3-a695-850bddd6eb24@rowland.harvard.edu [2]
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---

Making multiple significant changes like these in a single patch is 
generally a bad idea.  It would be better to split this up into two or 
three patches, each doing one thing.

> Changes in v2:
>   Updated patch description.
> 
>  drivers/media/rc/imon.c | 69 +++++++++++++++++++++++++----------------
>  1 file changed, 42 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
> index f5221b018808..3469a401a572 100644
> --- a/drivers/media/rc/imon.c
> +++ b/drivers/media/rc/imon.c

> @@ -1761,16 +1756,30 @@ static void usb_rx_callback_intf0(struct urb *urb)
>  		break;
>  
>  	case 0:
> -		imon_incoming_packet(ictx, urb, intfnum);
> +		/*
> +		 * if we get a callback before we're done configuring the hardware, we
> +		 * can't yet process the data, as there's nowhere to send it, but we
> +		 * still need to submit a new rx URB to avoid wedging the hardware
> +		 */
> +		if (ictx->dev_present_intf0)
> +			imon_incoming_packet(ictx, urb, intfnum);
>  		break;
>  
> +	case -ECONNRESET:
> +	case -EILSEQ:
> +	case -EPROTO:
> +	case -EPIPE:
> +		dev_warn(ictx->dev, "imon %s: status(%d)\n",
> +			 __func__, urb->status);
> +		usb_unlink_urb(urb);

The URB you're unlinking here is the one that just completed, right?  
Which means it's already unlinked, so this call is unnecessary.

> @@ -1802,16 +1803,30 @@ static void usb_rx_callback_intf1(struct urb *urb)
>  		break;
>  
>  	case 0:
> -		imon_incoming_packet(ictx, urb, intfnum);
> +		/*
> +		 * if we get a callback before we're done configuring the hardware, we
> +		 * can't yet process the data, as there's nowhere to send it, but we
> +		 * still need to submit a new rx URB to avoid wedging the hardware
> +		 */
> +		if (ictx->dev_present_intf1)
> +			imon_incoming_packet(ictx, urb, intfnum);
>  		break;
>  
> +	case -ECONNRESET:
> +	case -EILSEQ:
> +	case -EPROTO:
> +	case -EPIPE:
> +		dev_warn(ictx->dev, "imon %s: status(%d)\n",
> +			 __func__, urb->status);
> +		usb_unlink_urb(urb);
> +		return;

Same here.

Alan Stern
[PATCH v3] media: imon: make send_packet() more robust
Posted by Tetsuo Handa 2 months, 3 weeks ago
syzbot is reporting that imon has three problems which result in
hung tasks due to forever holding device lock [1].

First problem is that when usb_rx_callback_intf0() once got -EPROTO error
after ictx->dev_present_intf0 became true, usb_rx_callback_intf0()
resubmits urb after printk(), and resubmitted urb causes
usb_rx_callback_intf0() to again get -EPROTO error. This results in
printk() flooding (RCU stalls).

Alan Stern commented [2] that

  In theory it's okay to resubmit _if_ the driver has a robust
  error-recovery scheme (such as giving up after some fixed limit on the
  number of errors or after some fixed time has elapsed, perhaps with a
  time delay to prevent a flood of errors).  Most drivers don't bother to
  do this; they simply give up right away.  This makes them more
  vulnerable to short-term noise interference during USB transfers, but in
  reality such interference is quite rare.  There's nothing really wrong
  with giving up right away.

but imon has a poor error-recovery scheme which just retries forever;
this behavior should be fixed.

Since I'm not sure whether it is safe for imon users to give up upon any
error code, this patch takes care of only union of error codes chosen from
modules in drivers/media/rc/ directory which handle -EPROTO error (i.e.
ir_toy, mceusb and igorplugusb).

Second problem is that when usb_rx_callback_intf0() once got -EPROTO error
before ictx->dev_present_intf0 becomes true, usb_rx_callback_intf0() always
resubmits urb due to commit 8791d63af0cf ("[media] imon: don't wedge
hardware after early callbacks"). Move the ictx->dev_present_intf0 test
introduced by commit 6f6b90c9231a ("[media] imon: don't parse scancodes
until intf configured") to immediately before imon_incoming_packet(), or
the first problem explained above happens without printk() flooding (i.e.
hung task).

Third problem is that when usb_rx_callback_intf0() is not called for some
reason (e.g. flaky hardware; the reproducer for this problem sometimes
prevents usb_rx_callback_intf0() from being called),
wait_for_completion_interruptible() in send_packet() never returns (i.e.
hung task). As a workaround for such situation, change send_packet() to
wait for completion with timeout of 10 seconds.

Link: https://syzkaller.appspot.com/bug?extid=592e2ab8775dbe0bf09a [1]
Link: https://lkml.kernel.org/r/d6da6709-d799-4be3-a695-850bddd6eb24@rowland.harvard.edu [2]
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v3:
  Dropped usb_unlink_urb() call from  usb_rx_callback_intf{0,1}().
  Dropped ictx->lock change for sending as a separate patch.

Changes in v2:
  Updated patch description.

 drivers/media/rc/imon.c | 61 +++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index f5221b018808..b914dd39c21c 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -645,12 +645,15 @@ static int send_packet(struct imon_context *ictx)
 		smp_rmb(); /* ensure later readers know we're not busy */
 		pr_err_ratelimited("error submitting urb(%d)\n", retval);
 	} else {
-		/* Wait for transmission to complete (or abort) */
-		retval = wait_for_completion_interruptible(
-				&ictx->tx.finished);
-		if (retval) {
+		/* Wait for transmission to complete (or abort or timeout) */
+		retval = wait_for_completion_interruptible_timeout(&ictx->tx.finished, 10 * HZ);
+		if (retval <= 0) {
 			usb_kill_urb(ictx->tx_urb);
 			pr_err_ratelimited("task interrupted\n");
+			if (retval < 0)
+				ictx->tx.status = retval;
+			else
+				ictx->tx.status = -ETIMEDOUT;
 		}
 
 		ictx->tx.busy = false;
@@ -1745,14 +1748,6 @@ static void usb_rx_callback_intf0(struct urb *urb)
 	if (!ictx)
 		return;
 
-	/*
-	 * if we get a callback before we're done configuring the hardware, we
-	 * can't yet process the data, as there's nowhere to send it, but we
-	 * still need to submit a new rx URB to avoid wedging the hardware
-	 */
-	if (!ictx->dev_present_intf0)
-		goto out;
-
 	switch (urb->status) {
 	case -ENOENT:		/* usbcore unlink successful! */
 		return;
@@ -1761,16 +1756,29 @@ static void usb_rx_callback_intf0(struct urb *urb)
 		break;
 
 	case 0:
-		imon_incoming_packet(ictx, urb, intfnum);
+		/*
+		 * if we get a callback before we're done configuring the hardware, we
+		 * can't yet process the data, as there's nowhere to send it, but we
+		 * still need to submit a new rx URB to avoid wedging the hardware
+		 */
+		if (ictx->dev_present_intf0)
+			imon_incoming_packet(ictx, urb, intfnum);
 		break;
 
+	case -ECONNRESET:
+	case -EILSEQ:
+	case -EPROTO:
+	case -EPIPE:
+		dev_warn(ictx->dev, "imon %s: status(%d)\n",
+			 __func__, urb->status);
+		return;
+
 	default:
 		dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
 			 __func__, urb->status);
 		break;
 	}
 
-out:
 	usb_submit_urb(ictx->rx_urb_intf0, GFP_ATOMIC);
 }
 
@@ -1786,14 +1794,6 @@ static void usb_rx_callback_intf1(struct urb *urb)
 	if (!ictx)
 		return;
 
-	/*
-	 * if we get a callback before we're done configuring the hardware, we
-	 * can't yet process the data, as there's nowhere to send it, but we
-	 * still need to submit a new rx URB to avoid wedging the hardware
-	 */
-	if (!ictx->dev_present_intf1)
-		goto out;
-
 	switch (urb->status) {
 	case -ENOENT:		/* usbcore unlink successful! */
 		return;
@@ -1802,16 +1802,29 @@ static void usb_rx_callback_intf1(struct urb *urb)
 		break;
 
 	case 0:
-		imon_incoming_packet(ictx, urb, intfnum);
+		/*
+		 * if we get a callback before we're done configuring the hardware, we
+		 * can't yet process the data, as there's nowhere to send it, but we
+		 * still need to submit a new rx URB to avoid wedging the hardware
+		 */
+		if (ictx->dev_present_intf1)
+			imon_incoming_packet(ictx, urb, intfnum);
 		break;
 
+	case -ECONNRESET:
+	case -EILSEQ:
+	case -EPROTO:
+	case -EPIPE:
+		dev_warn(ictx->dev, "imon %s: status(%d)\n",
+			 __func__, urb->status);
+		return;
+
 	default:
 		dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
 			 __func__, urb->status);
 		break;
 	}
 
-out:
 	usb_submit_urb(ictx->rx_urb_intf1, GFP_ATOMIC);
 }
 
-- 
2.50.1