drivers/ntb/ntb_transport.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
Since the CPU selected by schedule_work is uncertain, multiple link_event
callbacks may be executed at same time. For example, after peer's link
is up, it is down quickly before local link_work completed. If link_cleanup
is added to the workqueue of another CPU, then link_work and link_cleanup
may be executed at the same time. So add a mutex to prevent them from being
executed concurrently.
Signed-off-by: fuyuanli <fuyuanli@didiglobal.com>
---
drivers/ntb/ntb_transport.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 4f775c3e218f..902968e24c7a 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -59,6 +59,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
+#include <linux/mutex.h>
#include "linux/ntb.h"
#include "linux/ntb_transport.h"
@@ -241,6 +242,9 @@ struct ntb_transport_ctx {
struct work_struct link_cleanup;
struct dentry *debugfs_node_dir;
+
+ /* Make sure workq of link event be executed serially */
+ struct mutex link_event_lock;
};
enum {
@@ -1024,7 +1028,9 @@ static void ntb_transport_link_cleanup_work(struct work_struct *work)
struct ntb_transport_ctx *nt =
container_of(work, struct ntb_transport_ctx, link_cleanup);
+ mutex_lock(&nt->link_event_lock);
ntb_transport_link_cleanup(nt);
+ mutex_unlock(&nt->link_event_lock);
}
static void ntb_transport_event_callback(void *data)
@@ -1047,6 +1053,8 @@ static void ntb_transport_link_work(struct work_struct *work)
u32 val;
int rc = 0, i, spad;
+ mutex_lock(&nt->link_event_lock);
+
/* send the local info, in the opposite order of the way we read it */
if (nt->use_msi) {
@@ -1125,6 +1133,7 @@ static void ntb_transport_link_work(struct work_struct *work)
schedule_delayed_work(&qp->link_work, 0);
}
+ mutex_unlock(&nt->link_event_lock);
return;
out1:
@@ -1132,10 +1141,13 @@ static void ntb_transport_link_work(struct work_struct *work)
ntb_free_mw(nt, i);
/* if there's an actual failure, we should just bail */
- if (rc < 0)
+ if (rc < 0) {
+ mutex_unlock(&nt->link_event_lock);
return;
+ }
out:
+ mutex_unlock(&nt->link_event_lock);
if (ntb_link_is_up(ndev, NULL, NULL) == 1)
schedule_delayed_work(&nt->link_work,
msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
--
2.34.1
On 8/22/25 7:41 AM, fuyuanli wrote: > Since the CPU selected by schedule_work is uncertain, multiple link_event > callbacks may be executed at same time. For example, after peer's link > is up, it is down quickly before local link_work completed. If link_cleanup > is added to the workqueue of another CPU, then link_work and link_cleanup > may be executed at the same time. So add a mutex to prevent them from being > executed concurrently. > > Signed-off-by: fuyuanli <fuyuanli@didiglobal.com> > --- > drivers/ntb/ntb_transport.c | 14 +++++++++++++- > 1 file changed, 13 insertions(+), 1 deletion(-) > > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c > index 4f775c3e218f..902968e24c7a 100644 > --- a/drivers/ntb/ntb_transport.c > +++ b/drivers/ntb/ntb_transport.c > @@ -59,6 +59,7 @@ > #include <linux/slab.h> > #include <linux/types.h> > #include <linux/uaccess.h> > +#include <linux/mutex.h> > #include "linux/ntb.h" > #include "linux/ntb_transport.h" > > @@ -241,6 +242,9 @@ struct ntb_transport_ctx { > struct work_struct link_cleanup; > > struct dentry *debugfs_node_dir; > + > + /* Make sure workq of link event be executed serially */ > + struct mutex link_event_lock; > }; > > enum { > @@ -1024,7 +1028,9 @@ static void ntb_transport_link_cleanup_work(struct work_struct *work) > struct ntb_transport_ctx *nt = > container_of(work, struct ntb_transport_ctx, link_cleanup); > > + mutex_lock(&nt->link_event_lock); Can you please use guard() instead? Should produce less code and not worry about calling all the unlocks. DJ > ntb_transport_link_cleanup(nt); > + mutex_unlock(&nt->link_event_lock); > } > > static void ntb_transport_event_callback(void *data) > @@ -1047,6 +1053,8 @@ static void ntb_transport_link_work(struct work_struct *work) > u32 val; > int rc = 0, i, spad; > > + mutex_lock(&nt->link_event_lock); > + > /* send the local info, in the opposite order of the way we read it */ > > if (nt->use_msi) { > @@ -1125,6 +1133,7 @@ static void ntb_transport_link_work(struct work_struct *work) > schedule_delayed_work(&qp->link_work, 0); > } > > + mutex_unlock(&nt->link_event_lock); > return; > > out1: > @@ -1132,10 +1141,13 @@ static void ntb_transport_link_work(struct work_struct *work) > ntb_free_mw(nt, i); > > /* if there's an actual failure, we should just bail */ > - if (rc < 0) > + if (rc < 0) { > + mutex_unlock(&nt->link_event_lock); > return; > + } > > out: > + mutex_unlock(&nt->link_event_lock); > if (ntb_link_is_up(ndev, NULL, NULL) == 1) > schedule_delayed_work(&nt->link_work, > msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
© 2016 - 2025 Red Hat, Inc.