[PATCH] dlm: improve lock management and concurrency control

Alessio Attilio posted 1 patch 4 days, 9 hours ago
fs/dlm/lock.c | 68 +++++++++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 24 deletions(-)
[PATCH] dlm: improve lock management and concurrency control
Posted by Alessio Attilio 4 days, 9 hours ago
This patch introduces several improvements to lock handling in the DLM
subsystem, focusing on thread safety, correctness, and code clarity.

- Added explicit locking (spin_lock_bh/spin_unlock_bh) around accesses
  to proc->locks and proc->asts in dlm_clear_proc_locks, ensuring safe
  concurrent operations during lock cleanup.
- Replaced del_proc_lock with direct, lock-protected list operations
  for improved clarity and correctness.
- Updated send_unlock to set RSB_MASTER_UNCERTAIN only when releasing
  the last lock on an rsb, ensuring proper master confirmation.
- Improved handling of persistent and non-persistent locks by setting
  appropriate flags (DLM_DFL_ORPHAN_BIT or DLM_IFL_DEAD_BIT) before
  orphaning or unlocking.
- Removed outdated comments related to mutex protection and serialization
  assumptions, reflecting the updated concurrency model.

Signed-off-by: Alessio Attilio <alessio.attilio.dev@gmail.com>
---
 fs/dlm/lock.c | 68 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 24 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 6dd3a524cd35..bde62c991cfc 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3654,12 +3654,33 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 	return error;
 }
 
-/* FIXME: if this lkb is the only lock we hold on the rsb, then set
-   MASTER_UNCERTAIN to force the next request on the rsb to confirm
-   that the master is still correct. */
-
 static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
+	struct dlm_lkb *tmp;
+	int count = 0;
+
+	list_for_each_entry(tmp, &r->res_grantqueue, lkb_statequeue) {
+		if (is_process_copy(tmp))
+			count++;
+	}
+	list_for_each_entry(tmp, &r->res_convertqueue, lkb_statequeue) {
+		if (is_process_copy(tmp))
+			count++;
+	}
+	list_for_each_entry(tmp, &r->res_waitqueue, lkb_statequeue) {
+		if (is_process_copy(tmp))
+			count++;
+	}
+
+	/*
+	 * When releasing the last lock on the rsb, we mark the master as uncertain.
+ 	 * This ensures that the next lock request will verify the master node,
+ 	 * maintaining consistency across the cluster.
+ 	 */
+
+	if (count == 1)
+		rsb_set_flag(r, RSB_MASTER_UNCERTAIN);
+
 	return send_common(r, lkb, DLM_MSG_UNLOCK);
 }
 
@@ -6150,16 +6171,6 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
 	return lkb;
 }
 
-/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
-   1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
-   which we clear here. */
-
-/* proc CLOSING flag is set so no more device_reads should look at proc->asts
-   list, and no more device_writes should add lkb's to proc->locks list; so we
-   shouldn't need to take asts_spin or locks_spin here.  this assumes that
-   device reads/writes/closes are serialized -- FIXME: we may need to serialize
-   them ourself. */
-
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
 	struct dlm_callback *cb, *cb_safe;
@@ -6168,36 +6179,45 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 	dlm_lock_recovery(ls);
 
 	while (1) {
-		lkb = del_proc_lock(ls, proc);
+		lkb = NULL;
+		spin_lock_bh(&proc->locks_spin);
+		if (!list_empty(&proc->locks)) {
+			lkb = list_entry(proc->locks.next, struct dlm_lkb,
+					 lkb_ownqueue);
+			list_del_init(&lkb->lkb_ownqueue);
+		}
+		spin_unlock_bh(&proc->locks_spin);
+
 		if (!lkb)
 			break;
-		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
+
+		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
+			set_bit(DLM_DFL_ORPHAN_BIT, &lkb->lkb_dflags);
 			orphan_proc_lock(ls, lkb);
-		else
+		} else {
+			set_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags);
 			unlock_proc_lock(ls, lkb);
-
-		/* this removes the reference for the proc->locks list
-		   added by dlm_user_request, it may result in the lkb
-		   being freed */
+		}
 
 		dlm_put_lkb(lkb);
 	}
 
-	spin_lock_bh(&ls->ls_clear_proc_locks);
-
+	spin_lock_bh(&proc->locks_spin);
 	/* in-progress unlocks */
 	list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
 		list_del_init(&lkb->lkb_ownqueue);
 		set_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags);
 		dlm_put_lkb(lkb);
 	}
+	spin_unlock_bh(&proc->locks_spin);
 
+	spin_lock_bh(&proc->asts_spin);
 	list_for_each_entry_safe(cb, cb_safe, &proc->asts, list) {
 		list_del(&cb->list);
 		dlm_free_cb(cb);
 	}
+	spin_unlock_bh(&proc->asts_spin);
 
-	spin_unlock_bh(&ls->ls_clear_proc_locks);
 	dlm_unlock_recovery(ls);
 }
 
-- 
2.48.1
Re: [PATCH] dlm: improve lock management and concurrency control
Posted by kernel test robot 3 days, 14 hours ago
Hi Alessio,

kernel test robot noticed the following build warnings:

[auto build test WARNING on teigland-dlm/next]
[also build test WARNING on linus/master v6.17-rc4 next-20250905]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Alessio-Attilio/dlm-improve-lock-management-and-concurrency-control/20250906-000819
base:   https://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git next
patch link:    https://lore.kernel.org/r/20250905160552.496879-1-alessio.attilio.dev%40gmail.com
patch subject: [PATCH] dlm: improve lock management and concurrency control
config: sh-randconfig-r071-20250906 (https://download.01.org/0day-ci/archive/20250906/202509061809.348XSVqi-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 10.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250906/202509061809.348XSVqi-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509061809.348XSVqi-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/dlm/lock.c:6153:24: warning: 'del_proc_lock' defined but not used [-Wunused-function]
    6153 | static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
         |                        ^~~~~~~~~~~~~


vim +/del_proc_lock +6153 fs/dlm/lock.c

597d0cae0f99f6 David Teigland  2006-07-12  6148  
ef0c2bb05f40f9 David Teigland  2007-03-28  6149  /* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
ef0c2bb05f40f9 David Teigland  2007-03-28  6150     (which does lock_rsb) due to deadlock with receiving a message that does
23e8e1aaacb10d David Teigland  2011-04-05  6151     lock_rsb followed by dlm_user_add_cb() */
ef0c2bb05f40f9 David Teigland  2007-03-28  6152  
ef0c2bb05f40f9 David Teigland  2007-03-28 @6153  static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
ef0c2bb05f40f9 David Teigland  2007-03-28  6154  				     struct dlm_user_proc *proc)
ef0c2bb05f40f9 David Teigland  2007-03-28  6155  {
ef0c2bb05f40f9 David Teigland  2007-03-28  6156  	struct dlm_lkb *lkb = NULL;
ef0c2bb05f40f9 David Teigland  2007-03-28  6157  
578acf9a87a875 Alexander Aring 2024-04-02  6158  	spin_lock_bh(&ls->ls_clear_proc_locks);
ef0c2bb05f40f9 David Teigland  2007-03-28  6159  	if (list_empty(&proc->locks))
ef0c2bb05f40f9 David Teigland  2007-03-28  6160  		goto out;
ef0c2bb05f40f9 David Teigland  2007-03-28  6161  
ef0c2bb05f40f9 David Teigland  2007-03-28  6162  	lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue);
ef0c2bb05f40f9 David Teigland  2007-03-28  6163  	list_del_init(&lkb->lkb_ownqueue);
ef0c2bb05f40f9 David Teigland  2007-03-28  6164  
ef0c2bb05f40f9 David Teigland  2007-03-28  6165  	if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
8a39dcd9c32dd3 Alexander Aring 2023-03-06  6166  		set_bit(DLM_DFL_ORPHAN_BIT, &lkb->lkb_dflags);
ef0c2bb05f40f9 David Teigland  2007-03-28  6167  	else
e1af8728f600f6 Alexander Aring 2023-03-06  6168  		set_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags);
ef0c2bb05f40f9 David Teigland  2007-03-28  6169   out:
578acf9a87a875 Alexander Aring 2024-04-02  6170  	spin_unlock_bh(&ls->ls_clear_proc_locks);
ef0c2bb05f40f9 David Teigland  2007-03-28  6171  	return lkb;
ef0c2bb05f40f9 David Teigland  2007-03-28  6172  }
ef0c2bb05f40f9 David Teigland  2007-03-28  6173  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
[PATCH] fix: delete del_proc_lock
Posted by Alessio Attilio 3 days, 12 hours ago
The del_proc_lock function was originally retained for testing purposes during development of the Distributed Lock Manager (DLM).
With testing now complete and the function no longer serving a runtime role, it is safe to remove.

Reason for Removal: The function is unused in production code and was only kept temporarily for debugging and validation.
Its presence is no longer necessary and may cause confusion or clutter.

Impact: This change simplifies the codebase and improves maintainability without affecting functionality or stability.

Signed-off-by: Alessio Attilio <alessio.attilio.dev@gmail.com>
---
 fs/dlm/lock.c | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 9d74b78d3544..9170b5c09823 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -6146,35 +6146,9 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
 	return error;
 }
 
-/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
-   (which does lock_rsb) due to deadlock with receiving a message that does
-   lock_rsb followed by dlm_user_add_cb() */
-
-static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
-				     struct dlm_user_proc *proc)
+static void clean_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
-	struct dlm_lkb *lkb = NULL;
-
-	spin_lock_bh(&ls->ls_clear_proc_locks);
-	if (list_empty(&proc->locks))
-		goto out;
-
-	lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue);
-	list_del_init(&lkb->lkb_ownqueue);
-
-	if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
-		set_bit(DLM_DFL_ORPHAN_BIT, &lkb->lkb_dflags);
-	else
-		set_bit(DLM_IFL_DEAD_BIT, &lkb->lkb_iflags);
- out:
-	spin_unlock_bh(&ls->ls_clear_proc_locks);
-	return lkb;
-}
-
-void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
-{
-	struct dlm_callback *cb, *cb_safe;
-	struct dlm_lkb *lkb, *safe;
+	struct dlm_lkb *lkb;
 
 	dlm_lock_recovery(ls);
 
-- 
2.48.1