[PATCH] jfs: snapshot syncpt for logsync comparisons

Cen Zhang posted 1 patch 1 month ago
fs/jfs/jfs_dmap.c   | 12 ++++++------
fs/jfs/jfs_imap.c   | 11 ++++++-----
fs/jfs/jfs_logmgr.c |  5 +++--
fs/jfs/jfs_logmgr.h | 10 ++++++++++
fs/jfs/jfs_txnmgr.c |  9 ++++++---
5 files changed, 31 insertions(+), 16 deletions(-)
[PATCH] jfs: snapshot syncpt for logsync comparisons
Posted by Cen Zhang 1 month ago
JFS uses log->syncpt as the base for circular log sequence number
comparisons. Allocation-map update paths compare two LSNs against that
base while deciding whether a metapage must inherit an older recovery
LSN or a newer commit LSN.

lmLogSync() updates log->syncpt while holding log->loglock. The pmap
update paths protect the metapage and logsynclist state with
log->synclock instead, so their plain logdiff() reads of log->syncpt can
race with the sync point update. In dbUpdatePMap(), the transaction LSN
difference is even computed before taking log->synclock and then compared
with a metapage difference computed later, allowing one inheritance
decision to use two different sync point bases.

A mixed-base comparison can miss an older transaction LSN inheritance.
After the transaction block is removed from the logsynclist, the
allocation-map metapage can then remain too young and allow a later sync
point to advance past allocation log records before the persistent map
page reaches home.

Snapshot log->syncpt once for each logsynclist ordering decision and use
that snapshot for all comparisons in the decision. Mark the concurrent
syncpt stores with WRITE_ONCE() to pair with the snapshots and document
that the field is intentionally accessed locklessly relative to
log->synclock.

Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
---
 fs/jfs/jfs_dmap.c   | 12 ++++++------
 fs/jfs/jfs_imap.c   | 11 ++++++-----
 fs/jfs/jfs_logmgr.c |  5 +++--
 fs/jfs/jfs_logmgr.h | 10 ++++++++++
 fs/jfs/jfs_txnmgr.c |  9 ++++++---
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2abe8cc02ee6f..d0b31ebd30cad 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -446,7 +446,7 @@ dbUpdatePMap(struct inode *ipbmap,
 	struct dmap *dp;
 	struct metapage *mp;
 	struct jfs_log *log;
-	int lsn, difft, diffp;
+	int lsn, difft, diffp, syncpt;
 	unsigned long flags;
 
 	/* the blocks better be within the mapsize. */
@@ -458,10 +458,8 @@ dbUpdatePMap(struct inode *ipbmap,
 		return -EIO;
 	}
 
-	/* compute delta of transaction lsn from log syncpt */
 	lsn = tblk->lsn;
 	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
-	logdiff(difft, lsn, log);
 
 	/*
 	 * update the block state a dmap at a time.
@@ -553,9 +551,11 @@ dbUpdatePMap(struct inode *ipbmap,
 		lastlblkno = lblkno;
 
 		LOGSYNC_LOCK(log, flags);
+		syncpt = READ_ONCE(log->syncpt);
+		difft = logdiff_syncpt(lsn, syncpt, log->logsize);
 		if (mp->lsn != 0) {
 			/* inherit older/smaller lsn */
-			logdiff(diffp, mp->lsn, log);
+			diffp = logdiff_syncpt(mp->lsn, syncpt, log->logsize);
 			if (difft < diffp) {
 				mp->lsn = lsn;
 
@@ -564,8 +564,8 @@ dbUpdatePMap(struct inode *ipbmap,
 			}
 
 			/* inherit younger/larger clsn */
-			logdiff(difft, tblk->clsn, log);
-			logdiff(diffp, mp->clsn, log);
+			difft = logdiff_syncpt(tblk->clsn, syncpt, log->logsize);
+			diffp = logdiff_syncpt(mp->clsn, syncpt, log->logsize);
 			if (difft > diffp)
 				mp->clsn = tblk->clsn;
 		} else {
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 294a67327c735..62fe50a65aa50 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2732,7 +2732,7 @@ diUpdatePMap(struct inode *ipimap,
 	struct inomap *imap;
 	u32 mask;
 	struct jfs_log *log;
-	int lsn, difft, diffp;
+	int lsn, difft, diffp, syncpt;
 	unsigned long flags;
 
 	imap = JFS_IP(ipimap)->i_imap;
@@ -2808,10 +2808,11 @@ diUpdatePMap(struct inode *ipimap,
 	lsn = tblk->lsn;
 	log = JFS_SBI(tblk->sb)->log;
 	LOGSYNC_LOCK(log, flags);
+	syncpt = READ_ONCE(log->syncpt);
 	if (mp->lsn != 0) {
 		/* inherit older/smaller lsn */
-		logdiff(difft, lsn, log);
-		logdiff(diffp, mp->lsn, log);
+		difft = logdiff_syncpt(lsn, syncpt, log->logsize);
+		diffp = logdiff_syncpt(mp->lsn, syncpt, log->logsize);
 		if (difft < diffp) {
 			mp->lsn = lsn;
 			/* move mp after tblock in logsync list */
@@ -2819,8 +2820,8 @@ diUpdatePMap(struct inode *ipimap,
 		}
 		/* inherit younger/larger clsn */
 		assert(mp->clsn);
-		logdiff(difft, tblk->clsn, log);
-		logdiff(diffp, mp->clsn, log);
+		difft = logdiff_syncpt(tblk->clsn, syncpt, log->logsize);
+		diffp = logdiff_syncpt(mp->clsn, syncpt, log->logsize);
 		if (difft > diffp)
 			mp->clsn = tblk->clsn;
 	} else {
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index ada00d5bc2146..5483de65d6d55 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -968,7 +968,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 
-		log->syncpt = log->sync;
+		WRITE_ONCE(log->syncpt, log->sync);
 	} else
 		lsn = log->lsn;
 
@@ -1002,7 +1002,8 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 		/* log->state = LOGWRAP; */
 
 		/* reset sync point computation */
-		log->syncpt = log->sync = lsn;
+		log->sync = lsn;
+		WRITE_ONCE(log->syncpt, lsn);
 		log->nextsync = delta;
 	} else
 		/* next syncpt trigger = written + more */
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 8b8994e48cd08..7f534ea9605b6 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -487,6 +487,16 @@ struct logsyncblk {
 		diff += (log)->logsize;\
 }
 
+static inline int logdiff_syncpt(int lsn, int syncpt, int logsize)
+{
+	int diff = lsn - syncpt;
+
+	if (diff < 0)
+		diff += logsize;
+
+	return diff;
+}
+
 extern int lmLogOpen(struct super_block *sb);
 extern int lmLogClose(struct super_block *sb);
 extern int lmLogShutdown(struct jfs_log * log);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index c16578af3a77e..db2f677742d1f 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -905,7 +905,7 @@ static void txUnlock(struct tblock * tblk)
 	lid_t lid, next, llid, k;
 	struct metapage *mp;
 	struct jfs_log *log;
-	int difft, diffp;
+	int difft, diffp, syncpt;
 	unsigned long flags;
 
 	jfs_info("txUnlock: tblk = 0x%p", tblk);
@@ -935,8 +935,11 @@ static void txUnlock(struct tblock * tblk)
 			/* inherit younger/larger clsn */
 			LOGSYNC_LOCK(log, flags);
 			if (mp->clsn) {
-				logdiff(difft, tblk->clsn, log);
-				logdiff(diffp, mp->clsn, log);
+				syncpt = READ_ONCE(log->syncpt);
+				difft = logdiff_syncpt(tblk->clsn, syncpt,
+						       log->logsize);
+				diffp = logdiff_syncpt(mp->clsn, syncpt,
+						       log->logsize);
 				if (difft > diffp)
 					mp->clsn = tblk->clsn;
 			} else