When an IRQ worker is running, unplugging the device would cause a
crash. The sealevel hardware this driver was written for was not
hotpluggable, so I never realized it.
This change uses RCU to create a list of workers, which it tears down on
disconnect.
Signed-off-by: Mary Strodl <mstrodl@csh.rit.edu>
---
drivers/gpio/gpio-mpsse.c | 119 +++++++++++++++++++++++++++++++++++---
1 file changed, 110 insertions(+), 9 deletions(-)
diff --git a/drivers/gpio/gpio-mpsse.c b/drivers/gpio/gpio-mpsse.c
index 9f42bb30b4ec..af9a9196ac9d 100644
--- a/drivers/gpio/gpio-mpsse.c
+++ b/drivers/gpio/gpio-mpsse.c
@@ -10,6 +10,7 @@
#include <linux/cleanup.h>
#include <linux/gpio/driver.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/usb.h>
struct mpsse_priv {
@@ -17,8 +18,10 @@ struct mpsse_priv {
struct usb_device *udev; /* USB device encompassing all MPSSEs */
struct usb_interface *intf; /* USB interface for this MPSSE */
u8 intf_id; /* USB interface number for this MPSSE */
- struct work_struct irq_work; /* polling work thread */
+ struct list_head workers; /* polling work threads */
struct mutex irq_mutex; /* lock over irq_data */
+ struct mutex irq_race; /* race for polling worker teardown */
+ raw_spinlock_t irq_spin; /* protects worker list */
atomic_t irq_type[16]; /* pin -> edge detection type */
atomic_t irq_enabled;
int id;
@@ -34,6 +37,15 @@ struct mpsse_priv {
struct mutex io_mutex; /* sync I/O with disconnect */
};
+struct mpsse_worker {
+ struct mpsse_priv *priv;
+ struct work_struct work;
+ atomic_t cancelled;
+ struct list_head list; /* linked list */
+ struct list_head destroy; /* teardown linked list */
+ struct rcu_head rcu; /* synchronization */
+};
+
struct bulk_desc {
bool tx; /* direction of bulk transfer */
u8 *data; /* input (tx) or output (rx) */
@@ -261,9 +273,8 @@ static int gpio_mpsse_direction_input(struct gpio_chip *chip,
guard(mutex)(&priv->io_mutex);
priv->gpio_dir[bank] &= ~BIT(bank_offset);
- gpio_mpsse_set_bank(priv, bank);
- return 0;
+ return gpio_mpsse_set_bank(priv, bank);
}
static int gpio_mpsse_get_direction(struct gpio_chip *chip,
@@ -284,18 +295,55 @@ static int gpio_mpsse_get_direction(struct gpio_chip *chip,
return ret;
}
-static void gpio_mpsse_poll(struct work_struct *work)
+static void gpio_mpsse_stop(struct mpsse_worker *worker)
+{
+ cancel_work_sync(&worker->work);
+ kfree(worker);
+}
+
+static void gpio_mpsse_poll(struct work_struct *my_work)
{
unsigned long pin_mask, pin_states, flags;
int irq_enabled, offset, err, value, fire_irq,
irq, old_value[16], irq_type[16];
- struct mpsse_priv *priv = container_of(work, struct mpsse_priv,
- irq_work);
+ struct mpsse_worker *worker;
+ struct mpsse_worker *my_worker = container_of(my_work, struct mpsse_worker, work);
+ struct mpsse_priv *priv = my_worker->priv;
+ struct list_head destructors = LIST_HEAD_INIT(destructors);
for (offset = 0; offset < priv->gpio.ngpio; ++offset)
old_value[offset] = -1;
- while ((irq_enabled = atomic_read(&priv->irq_enabled))) {
+ /*
+ * We only want one worker. Workers race to acquire irq_race and tear
+ * down all other workers. This is a cond guard so that we don't deadlock
+ * trying to cancel a worker.
+ */
+ scoped_cond_guard(mutex_try, ;, &priv->irq_race) {
+ scoped_guard(rcu) {
+ list_for_each_entry_rcu(worker, &priv->workers, list) {
+ /* Don't stop ourselves */
+ if (worker == my_worker)
+ continue;
+
+ scoped_guard(raw_spinlock_irqsave, &priv->irq_spin)
+ list_del_rcu(&worker->list);
+
+ /* Give worker a chance to terminate itself */
+ atomic_set(&worker->cancelled, 1);
+ /* Keep track of stuff to cancel */
+ INIT_LIST_HEAD(&worker->destroy);
+ list_add(&worker->destroy, &destructors);
+ }
+ }
+ /* Make sure list consumers are finished before we tear down */
+ synchronize_rcu();
+ list_for_each_entry(worker, &destructors, destroy)
+ gpio_mpsse_stop(worker);
+ }
+
+ while ((irq_enabled = atomic_read(&priv->irq_enabled)) &&
+ !atomic_read(&my_worker->cancelled)) {
usleep_range(MPSSE_POLL_INTERVAL, MPSSE_POLL_INTERVAL + 1000);
/* Cleanup will trigger at the end of the loop */
guard(mutex)(&priv->irq_mutex);
@@ -370,21 +418,41 @@ static int gpio_mpsse_set_irq_type(struct irq_data *irqd, unsigned int type)
static void gpio_mpsse_irq_disable(struct irq_data *irqd)
{
+ struct mpsse_worker *worker;
struct mpsse_priv *priv = irq_data_get_irq_chip_data(irqd);
atomic_and(~BIT(irqd->hwirq), &priv->irq_enabled);
gpiochip_disable_irq(&priv->gpio, irqd->hwirq);
+
+ /* Can't actually do teardown in IRQ context (blocks...) */
+ scoped_guard(rcu)
+ list_for_each_entry_rcu(worker, &priv->workers, list)
+ atomic_set(&worker->cancelled, 1);
}
static void gpio_mpsse_irq_enable(struct irq_data *irqd)
{
+ struct mpsse_worker *worker;
struct mpsse_priv *priv = irq_data_get_irq_chip_data(irqd);
gpiochip_enable_irq(&priv->gpio, irqd->hwirq);
/* If no-one else was using the IRQ, enable it */
if (!atomic_fetch_or(BIT(irqd->hwirq), &priv->irq_enabled)) {
- INIT_WORK(&priv->irq_work, gpio_mpsse_poll);
- schedule_work(&priv->irq_work);
+ /*
+ * Can't be devm because it uses a non-raw spinlock (illegal in
+ * this context, where a raw spinlock is held by our caller)
+ */
+ worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+ if (!worker)
+ return;
+
+ worker->priv = priv;
+ INIT_LIST_HEAD(&worker->list);
+ INIT_WORK(&worker->work, gpio_mpsse_poll);
+ schedule_work(&worker->work);
+
+ scoped_guard(raw_spinlock_irqsave, &priv->irq_spin)
+ list_add_rcu(&worker->list, &priv->workers);
}
}
@@ -436,6 +504,12 @@ static int gpio_mpsse_probe(struct usb_interface *interface,
if (err)
return err;
+ err = devm_mutex_init(dev, &priv->irq_race);
+ if (err)
+ return err;
+
+ raw_spin_lock_init(&priv->irq_spin);
+
priv->gpio.label = devm_kasprintf(dev, GFP_KERNEL,
"gpio-mpsse.%d.%d",
priv->id, priv->intf_id);
@@ -504,7 +578,34 @@ static int gpio_mpsse_probe(struct usb_interface *interface,
static void gpio_mpsse_disconnect(struct usb_interface *intf)
{
+ struct mpsse_worker *worker;
struct mpsse_priv *priv = usb_get_intfdata(intf);
+ struct list_head destructors = LIST_HEAD_INIT(destructors);
+
+ /*
+ * Lock prevents double-free of worker from here and the teardown
+ * step at the beginning of gpio_mpsse_poll
+ */
+ scoped_guard(mutex, &priv->irq_race) {
+ scoped_guard(rcu) {
+ list_for_each_entry_rcu(worker, &priv->workers, list) {
+ scoped_guard(raw_spinlock_irqsave, &priv->irq_spin)
+ list_del_rcu(&worker->list);
+
+ /* Give worker a chance to terminate itself */
+ atomic_set(&worker->cancelled, 1);
+ /* Keep track of stuff to cancel */
+ INIT_LIST_HEAD(&worker->destroy);
+ list_add(&worker->destroy, &destructors);
+ }
+ }
+ /* Make sure list consumers are finished before we tear down */
+ synchronize_rcu();
+ list_for_each_entry(worker, &destructors, destroy)
+ gpio_mpsse_stop(worker);
+ }
+
+ rcu_barrier();
priv->intf = NULL;
usb_set_intfdata(intf, NULL);
--
2.47.0
On Tue, Sep 23, 2025 at 3:35 PM Mary Strodl <mstrodl@csh.rit.edu> wrote: > When an IRQ worker is running, unplugging the device would cause a > crash. The sealevel hardware this driver was written for was not > hotpluggable, so I never realized it. > > This change uses RCU to create a list of workers, which it tears down on > disconnect. > > Signed-off-by: Mary Strodl <mstrodl@csh.rit.edu> Oh this RCU thing is a bit terse and a lot of code. Can you look if you can use the new revocable resource management API? It uses RCU underneath. https://lwn.net/Articles/1038523/ Yours, Linus Walleij
Hey Linus, On Wed, Oct 01, 2025 at 09:15:14AM +0200, Linus Walleij wrote: > Oh this RCU thing is a bit terse and a lot of code. > > Can you look if you can use the new revocable resource > management API? It uses RCU underneath. > https://lwn.net/Articles/1038523/ Yeah I'm very open to suggestions about how to do this nicer. I can't read that article, because I don't subscribe to LWN (Maybe I should though). Looks like it becomes free tomorrow, so I can take a look then. I did find and skim through this, which I believe is the implementation of the API you're talking about: https://lore.kernel.org/chrome-platform/20250923075302.591026-2-tzungbi@kernel.org/ Based on this, it seems like: * This uses sleepable RCU, which I don't think we can use in the IRQ callbacks since they don't allow any blocking (this is the main reason for the complexity) * We'd still need some sort of list primitive, because we could potentially have multiple workers being torn down at a time, so we need to know what all to revoke when the device is being torn down. Right now, I'm using the RCU lists API to keep track of this. My instinct is to use devm, but that also isn't technically safe for use in IRQ handlers. I obviously haven't played with the revocable resource management API, so maybe these limitations aren't as big of a deal as I think they are. With that said, I think now that I've found spinlocks work, I could use those to gate access to the list everywhere, and use the standard lists api rather than the RCU lists api. Obviously teardown of the workers would happen outside the spin lock critical section, guarded by a proper mutex. Let me know what you think... Thank you!
On Wed, Oct 1, 2025 at 5:07 PM Mary Strodl <mstrodl@csh.rit.edu> wrote: > With that said, I think now that I've found spinlocks work, I could use those > to gate access to the list everywhere, and use the standard lists api rather > than the RCU lists api. Obviously teardown of the workers would happen outside > the spin lock critical section, guarded by a proper mutex. Yeah RCU is for massive parallelism where you have a lot of (performance sensitive) readers and a few writers to the struct. If this isn't performance-sensitive and doesn't have a *lot* of simultaneous readers, try to use simpler locking mechanisms if you can. But if performance hampers, RCU is surely the way to go! Yours, Linus Walleij
Hi Mary, kernel test robot noticed the following build warnings: https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Mary-Strodl/gpio-mpsse-use-rcu-to-ensure-worker-is-torn-down/20250923-214710 base: https://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio/for-next patch link: https://lore.kernel.org/r/20250923133304.273529-2-mstrodl%40csh.rit.edu patch subject: [PATCH v2 1/3] gpio: mpsse: use rcu to ensure worker is torn down config: i386-randconfig-141-20250929 (https://download.01.org/0day-ci/archive/20250929/202509290823.hreUi6Tp-lkp@intel.com/config) compiler: gcc-12 (Debian 12.4.0-5) 12.4.0 If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Reported-by: Dan Carpenter <dan.carpenter@linaro.org> | Closes: https://lore.kernel.org/r/202509290823.hreUi6Tp-lkp@intel.com/ New smatch warnings: drivers/gpio/gpio-mpsse.c:341 gpio_mpsse_poll() error: dereferencing freed memory 'worker' (line 342) drivers/gpio/gpio-mpsse.c:604 gpio_mpsse_disconnect() error: dereferencing freed memory 'worker' (line 605) vim +/worker +341 drivers/gpio/gpio-mpsse.c a14b0c5e3b0741 Mary Strodl 2025-09-23 304 static void gpio_mpsse_poll(struct work_struct *my_work) c46a74ff05c0ac Mary Strodl 2024-10-09 305 { c46a74ff05c0ac Mary Strodl 2024-10-09 306 unsigned long pin_mask, pin_states, flags; c46a74ff05c0ac Mary Strodl 2024-10-09 307 int irq_enabled, offset, err, value, fire_irq, c46a74ff05c0ac Mary Strodl 2024-10-09 308 irq, old_value[16], irq_type[16]; a14b0c5e3b0741 Mary Strodl 2025-09-23 309 struct mpsse_worker *worker; a14b0c5e3b0741 Mary Strodl 2025-09-23 310 struct mpsse_worker *my_worker = container_of(my_work, struct mpsse_worker, work); a14b0c5e3b0741 Mary Strodl 2025-09-23 311 struct mpsse_priv *priv = my_worker->priv; a14b0c5e3b0741 Mary Strodl 2025-09-23 312 struct list_head destructors = LIST_HEAD_INIT(destructors); c46a74ff05c0ac Mary Strodl 2024-10-09 313 c46a74ff05c0ac Mary Strodl 2024-10-09 314 for (offset = 0; offset < priv->gpio.ngpio; ++offset) c46a74ff05c0ac Mary Strodl 2024-10-09 315 old_value[offset] = -1; c46a74ff05c0ac Mary Strodl 2024-10-09 316 a14b0c5e3b0741 Mary Strodl 2025-09-23 317 /* a14b0c5e3b0741 Mary Strodl 2025-09-23 318 * We only want one worker. Workers race to acquire irq_race and tear a14b0c5e3b0741 Mary Strodl 2025-09-23 319 * down all other workers. This is a cond guard so that we don't deadlock a14b0c5e3b0741 Mary Strodl 2025-09-23 320 * trying to cancel a worker. a14b0c5e3b0741 Mary Strodl 2025-09-23 321 */ a14b0c5e3b0741 Mary Strodl 2025-09-23 322 scoped_cond_guard(mutex_try, ;, &priv->irq_race) { a14b0c5e3b0741 Mary Strodl 2025-09-23 323 scoped_guard(rcu) { a14b0c5e3b0741 Mary Strodl 2025-09-23 324 list_for_each_entry_rcu(worker, &priv->workers, list) { a14b0c5e3b0741 Mary Strodl 2025-09-23 325 /* Don't stop ourselves */ a14b0c5e3b0741 Mary Strodl 2025-09-23 326 if (worker == my_worker) a14b0c5e3b0741 Mary Strodl 2025-09-23 327 continue; a14b0c5e3b0741 Mary Strodl 2025-09-23 328 a14b0c5e3b0741 Mary Strodl 2025-09-23 329 scoped_guard(raw_spinlock_irqsave, &priv->irq_spin) a14b0c5e3b0741 Mary Strodl 2025-09-23 330 list_del_rcu(&worker->list); a14b0c5e3b0741 Mary Strodl 2025-09-23 331 a14b0c5e3b0741 Mary Strodl 2025-09-23 332 /* Give worker a chance to terminate itself */ a14b0c5e3b0741 Mary Strodl 2025-09-23 333 atomic_set(&worker->cancelled, 1); a14b0c5e3b0741 Mary Strodl 2025-09-23 334 /* Keep track of stuff to cancel */ a14b0c5e3b0741 Mary Strodl 2025-09-23 335 INIT_LIST_HEAD(&worker->destroy); a14b0c5e3b0741 Mary Strodl 2025-09-23 336 list_add(&worker->destroy, &destructors); a14b0c5e3b0741 Mary Strodl 2025-09-23 337 } a14b0c5e3b0741 Mary Strodl 2025-09-23 338 } a14b0c5e3b0741 Mary Strodl 2025-09-23 339 /* Make sure list consumers are finished before we tear down */ a14b0c5e3b0741 Mary Strodl 2025-09-23 340 synchronize_rcu(); a14b0c5e3b0741 Mary Strodl 2025-09-23 @341 list_for_each_entry(worker, &destructors, destroy) a14b0c5e3b0741 Mary Strodl 2025-09-23 @342 gpio_mpsse_stop(worker); This needs to be list_for_each_entry_save() because gpio_mpsse_stop() frees the worker. Or kfree_rcu() inside an rcu lock or something. a14b0c5e3b0741 Mary Strodl 2025-09-23 343 } a14b0c5e3b0741 Mary Strodl 2025-09-23 344 a14b0c5e3b0741 Mary Strodl 2025-09-23 345 while ((irq_enabled = atomic_read(&priv->irq_enabled)) && -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Mon, Sep 29, 2025 at 11:46:24AM +0300, Dan Carpenter wrote: > a14b0c5e3b0741 Mary Strodl 2025-09-23 339 /* Make sure list consumers are finished before we tear down */ > a14b0c5e3b0741 Mary Strodl 2025-09-23 340 synchronize_rcu(); > a14b0c5e3b0741 Mary Strodl 2025-09-23 @341 list_for_each_entry(worker, &destructors, destroy) > a14b0c5e3b0741 Mary Strodl 2025-09-23 @342 gpio_mpsse_stop(worker); > > This needs to be list_for_each_entry_save() because gpio_mpsse_stop() s/save/safe/. #CantType regards, dan carpenter > frees the worker. Or kfree_rcu() inside an rcu lock or something. > > a14b0c5e3b0741 Mary Strodl 2025-09-23 343 } > a14b0c5e3b0741 Mary Strodl 2025-09-23 344 > a14b0c5e3b0741 Mary Strodl 2025-09-23 345 while ((irq_enabled = atomic_read(&priv->irq_enabled)) && > > -- > 0-DAY CI Kernel Test Service > https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.