fs/jfs/jfs_txnmgr.c | 117 ++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 54 deletions(-)
In txLock()'s allocateLock path, the newly allocated tlock is
published (mp->lid = lid) only after TXN_UNLOCK(), creating a window
where a concurrent txLock() call on the same metapage reads a stale
mp->lid of 0, enters allocateLock itself, and creates a second tlock
for the same page.
This leads to two transactions holding separate tlocks for the same
metapage, causing journal log corruption: both transactions log
conflicting changes to the same page, and the second tlock's mp->lid
assignment silently orphans the first tlock.
The race is triggered in practice when two concurrent truncate
operations (via jfs_setattr and ftruncate) call diWrite() for
different inodes whose disk inodes share the same ipimap metapage.
Both call txLock(tid, ipimap, mp, tlckINODE|tlckENTRY) concurrently.
Fix this by completing all tlock initialization -- including flag,
type, linelock setup, page binding (mp->lid / xtlid), and
transaction/anonymous list enqueue -- while still holding TXN_LOCK,
before any concurrent txLock() caller can observe the tlock.
Only metapage_nohomeok() is deferred to after TXN_UNLOCK(), as it
may block on folio_lock() and folio_wait_writeback() and therefore
cannot execute under the TXN_LOCK spinlock. By this point the tlock
is fully initialized and published, so a concurrent adopter will see
a complete tlock.
For anonymous transactions, the list_add_tail() to
TxAnchor.anon_list is now performed under the already-held TXN_LOCK
instead of dropping and re-acquiring it, which also eliminates a
potential adoption-before-enqueue window.
Signed-off-by: Cen Zhang <zzzccc427@gmail.com>
---
fs/jfs/jfs_txnmgr.c | 117 ++++++++++++++++++++++++--------------------
1 file changed, 63 insertions(+), 54 deletions(-)
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 51be07337c7a..3be6af49a770 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -692,29 +692,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
*/
tlck->tid = tid;
- TXN_UNLOCK();
-
/* mark tlock for meta-data page */
- if (mp->xflag & COMMIT_PAGE) {
-
+ if (mp->xflag & COMMIT_PAGE)
tlck->flag = tlckPAGELOCK;
-
- /* mark the page dirty and nohomeok */
- metapage_nohomeok(mp);
-
- jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
- mp, mp->nohomeok, tid, tlck);
-
- /* if anonymous transaction, and buffer is on the group
- * commit synclist, mark inode to show this. This will
- * prevent the buffer from being marked nohomeok for too
- * long a time.
- */
- if ((tid == 0) && mp->lsn)
- set_cflag(COMMIT_Synclist, ip);
- }
- /* mark tlock for in-memory inode */
- else
+ else /* mark tlock for in-memory inode */
tlck->flag = tlckINODELOCK;
if (S_ISDIR(ip->i_mode))
@@ -725,39 +706,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
/* bind the tlock and the page */
tlck->ip = ip;
tlck->mp = mp;
- if (dir_xtree)
- jfs_ip->xtlid = lid;
- else
- mp->lid = lid;
-
- /*
- * enqueue transaction lock to transaction/inode
- */
- /* insert the tlock at tail of transaction tlock list */
- if (tid) {
- tblk = tid_to_tblock(tid);
- if (tblk->next)
- lid_to_tlock(tblk->last)->next = lid;
- else
- tblk->next = lid;
- tlck->next = 0;
- tblk->last = lid;
- }
- /* anonymous transaction:
- * insert the tlock at head of inode anonymous tlock list
- */
- else {
- tlck->next = jfs_ip->atlhead;
- jfs_ip->atlhead = lid;
- if (tlck->next == 0) {
- /* This inode's first anonymous transaction */
- jfs_ip->atltail = lid;
- TXN_LOCK();
- list_add_tail(&jfs_ip->anon_inode_list,
- &TxAnchor.anon_list);
- TXN_UNLOCK();
- }
- }
/* initialize type dependent area for linelock */
linelock = (struct linelock *) & tlck->lock;
@@ -807,6 +755,67 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
jfs_err("UFO tlock:0x%p", tlck);
}
+ /*
+ * Publish the tlock (set mp->lid or jfs_ip->xtlid) and enqueue it
+ * while still holding TXN_LOCK. A concurrent txLock() on the same
+ * metapage must see the non-zero lid here; otherwise it will take
+ * the allocateLock path and allocate a duplicate tlock, corrupting
+ * the transaction journal.
+ */
+ if (dir_xtree)
+ jfs_ip->xtlid = lid;
+ else
+ mp->lid = lid;
+
+ /*
+ * enqueue transaction lock to transaction/inode
+ */
+ /* insert the tlock at tail of transaction tlock list */
+ if (tid) {
+ tblk = tid_to_tblock(tid);
+ if (tblk->next)
+ lid_to_tlock(tblk->last)->next = lid;
+ else
+ tblk->next = lid;
+ tlck->next = 0;
+ tblk->last = lid;
+ }
+ /* anonymous transaction:
+ * insert the tlock at head of inode anonymous tlock list
+ */
+ else {
+ tlck->next = jfs_ip->atlhead;
+ jfs_ip->atlhead = lid;
+ if (tlck->next == 0) {
+ /* This inode's first anonymous transaction */
+ jfs_ip->atltail = lid;
+ list_add_tail(&jfs_ip->anon_inode_list,
+ &TxAnchor.anon_list);
+ }
+ }
+
+ TXN_UNLOCK();
+
+ /*
+ * metapage_nohomeok() may block (folio_lock, folio_wait_writeback),
+ * so it must be called after dropping TXN_LOCK. The tlock is fully
+ * initialized and visible at this point.
+ */
+ if (mp->xflag & COMMIT_PAGE) {
+ metapage_nohomeok(mp);
+
+ jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
+ mp, mp->nohomeok, tid, tlck);
+
+ /* if anonymous transaction, and buffer is on the group
+ * commit synclist, mark inode to show this. This will
+ * prevent the buffer from being marked nohomeok for too
+ * long a time.
+ */
+ if ((tid == 0) && mp->lsn)
+ set_cflag(COMMIT_Synclist, ip);
+ }
+
/*
* update tlock vector
*/
--
2.34.1
© 2016 - 2026 Red Hat, Inc.