[RFC PATCH 3/4] ext4: fast_commit: write TLVs into DAX ByteLog

Li Chen posted 4 patches 1 month, 1 week ago
[RFC PATCH 3/4] ext4: fast_commit: write TLVs into DAX ByteLog
Posted by Li Chen 1 month, 1 week ago
When dax_fc_bytelog is enabled, write fast commit TLVs directly into the
DAX-mapped ByteLog ring.
Keep traditional TLV writes confined to the reserved FC block and emit an
anchor TLV to describe the ByteLog window.

Signed-off-by: Li Chen <me@linux.beauty>
---
 fs/ext4/fast_commit.c         | 124 +++++++++++++++++++++++++++++++++-
 fs/ext4/fast_commit.h         |  13 ++++
 fs/ext4/fast_commit_bytelog.c |  20 ++++++
 fs/ext4/fast_commit_bytelog.h |   5 ++
 4 files changed, 159 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 64c0c4ba58b0..2f7b7ea29df2 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -723,6 +723,12 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
 	 * leaving enough space for a PAD tlv.
 	 */
 	remaining = bsize - EXT4_FC_TAG_BASE_LEN - off;
+	if (ext4_fc_bytelog_active(sbi) && len > remaining) {
+		ext4_fc_mark_ineligible(sb,
+					EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW,
+					NULL);
+		return NULL;
+	}
 	if (len <= remaining) {
 		sbi->s_fc_bytes += len;
 		return dst;
@@ -806,6 +812,31 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
 	struct ext4_fc_tl tl;
 	u8 *dst;
 
+	if (ext4_fc_bytelog_active(EXT4_SB(sb)) &&
+	    (tag == EXT4_FC_TAG_ADD_RANGE || tag == EXT4_FC_TAG_DEL_RANGE ||
+	     tag == EXT4_FC_TAG_LINK || tag == EXT4_FC_TAG_UNLINK ||
+	     tag == EXT4_FC_TAG_CREAT || tag == EXT4_FC_TAG_INODE)) {
+		struct ext4_fc_bytelog_vec vecs[2];
+		int ret;
+
+		tl.fc_tag = cpu_to_le16(tag);
+		tl.fc_len = cpu_to_le16(len);
+		vecs[0].base = &tl;
+		vecs[0].len = sizeof(tl);
+		vecs[1].base = val;
+		vecs[1].len = len;
+
+		ret = ext4_fc_bytelog_append_vec(sb, tag, vecs,
+						 ARRAY_SIZE(vecs));
+		if (!ret)
+			return true;
+		if (ret == -ENOSPC)
+			ext4_fc_mark_ineligible(sb,
+						EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW,
+						NULL);
+		return false;
+	}
+
 	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
 	if (!dst)
 		return false;
@@ -819,6 +850,17 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
 	return true;
 }
 
+static bool ext4_fc_add_bytelog_anchor_tlv(struct super_block *sb,
+					   struct ext4_fc_bytelog_anchor *anchor,
+					   u32 *crc)
+{
+	struct ext4_fc_bytelog_entry entry;
+
+	ext4_fc_bytelog_anchor_to_disk(&entry, anchor);
+	return ext4_fc_add_tlv(sb, EXT4_FC_TAG_DAX_BYTELOG_ANCHOR,
+			       sizeof(entry), (u8 *)&entry, crc);
+}
+
 /* Same as above, but adds dentry tlv. */
 static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
 				   struct ext4_fc_dentry_update *fc_dentry)
@@ -826,9 +868,40 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
 	struct ext4_fc_dentry_info fcd;
 	struct ext4_fc_tl tl;
 	int dlen = fc_dentry->fcd_name.name.len;
-	u8 *dst = ext4_fc_reserve_space(sb,
-			EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
+	u8 *dst;
+
+	if (ext4_fc_bytelog_active(EXT4_SB(sb)) &&
+	    (fc_dentry->fcd_op == EXT4_FC_TAG_LINK ||
+	     fc_dentry->fcd_op == EXT4_FC_TAG_UNLINK ||
+	     fc_dentry->fcd_op == EXT4_FC_TAG_CREAT)) {
+		struct ext4_fc_bytelog_vec vecs[3];
+		int ret;
+
+		fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
+		fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
+		tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
+		tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
+
+		vecs[0].base = &tl;
+		vecs[0].len = sizeof(tl);
+		vecs[1].base = &fcd;
+		vecs[1].len = sizeof(fcd);
+		vecs[2].base = fc_dentry->fcd_name.name.name;
+		vecs[2].len = dlen;
+
+		ret = ext4_fc_bytelog_append_vec(sb, fc_dentry->fcd_op, vecs,
+						 ARRAY_SIZE(vecs));
+		if (!ret)
+			return true;
+		if (ret == -ENOSPC)
+			ext4_fc_mark_ineligible(sb,
+						EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW,
+						NULL);
+		return false;
+	}
 
+	dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(fcd) +
+				    dlen, crc);
 	if (!dst)
 		return false;
 
@@ -872,6 +945,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
 
+	if (ext4_fc_bytelog_active(EXT4_SB(inode->i_sb))) {
+		struct ext4_fc_bytelog_vec vecs[3];
+
+		vecs[0].base = &tl;
+		vecs[0].len = sizeof(tl);
+		vecs[1].base = &fc_inode.fc_ino;
+		vecs[1].len = sizeof(fc_inode.fc_ino);
+		vecs[2].base = ext4_raw_inode(&iloc);
+		vecs[2].len = inode_len;
+
+		ret = ext4_fc_bytelog_append_vec(inode->i_sb, EXT4_FC_TAG_INODE,
+						 vecs, ARRAY_SIZE(vecs));
+		if (ret == -ENOSPC)
+			ext4_fc_mark_ineligible(inode->i_sb,
+						EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW,
+						NULL);
+		goto err;
+	}
+
 	ret = -ECANCELED;
 	dst = ext4_fc_reserve_space(inode->i_sb,
 		EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
@@ -1147,6 +1239,8 @@ static int ext4_fc_perform_commit(journal_t *journal)
 	}
 
 	/* Step 6.2: Now write all the dentry updates. */
+	if (ext4_fc_bytelog_active(sbi))
+		ext4_fc_bytelog_begin_commit(sb);
 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
 	if (ret)
 		goto out;
@@ -1164,6 +1258,22 @@ static int ext4_fc_perform_commit(journal_t *journal)
 		if (ret)
 			goto out;
 	}
+
+	if (ext4_fc_bytelog_active(sbi)) {
+		struct ext4_fc_bytelog_anchor anchor;
+
+		ret = ext4_fc_bytelog_end_commit(sb);
+		if (ret)
+			goto out;
+		if (sbi->s_fc_bytelog.seq) {
+			ext4_fc_bytelog_build_anchor(sb, &anchor,
+						     sbi->s_journal->j_running_transaction->t_tid);
+			if (!ext4_fc_add_bytelog_anchor_tlv(sb, &anchor, &crc)) {
+				ret = -ENOSPC;
+				goto out;
+			}
+		}
+	}
 	/* Step 6.4: Finally write tail tag to conclude this fast commit. */
 	ret = ext4_fc_write_tail(sb, crc);
 
@@ -1262,6 +1372,12 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
 	else
 		journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
 	set_task_ioprio(current, journal_ioprio);
+
+	if (ext4_fc_bytelog_active(sbi)) {
+		journal->j_fc_off = 0;
+		sbi->s_fc_bytes = 0;
+	}
+
 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
 	ret = ext4_fc_perform_commit(journal);
 	if (ret < 0) {
@@ -1367,8 +1483,9 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
 		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
 	}
 
-	if (full)
+	if (full || ext4_fc_bytelog_active(sbi))
 		sbi->s_fc_bytes = 0;
+	ext4_fc_bytelog_reset(sb, full);
 	ext4_fc_unlock(sb, alloc_ctx);
 	trace_ext4_fc_stats(sb);
 }
@@ -2315,6 +2432,7 @@ static const char * const fc_ineligible_reasons[] = {
 	[EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op",
 	[EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling",
 	[EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename",
+	[EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW] = "ByteLog TLV overflow",
 	[EXT4_FC_REASON_MIGRATE] = "Inode format migration",
 	[EXT4_FC_REASON_VERITY] = "fs-verity enable",
 	[EXT4_FC_REASON_MOVE_EXT] = "Move extents",
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 2f77a37fb101..fb51e19b9778 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -18,6 +18,7 @@
 #define EXT4_FC_TAG_PAD			0x0007
 #define EXT4_FC_TAG_TAIL		0x0008
 #define EXT4_FC_TAG_HEAD		0x0009
+#define EXT4_FC_TAG_DAX_BYTELOG_ANCHOR	0x000a
 
 #define EXT4_FC_SUPPORTED_FEATURES	0x0
 
@@ -70,6 +71,15 @@ struct ext4_fc_tail {
 	__le32 fc_crc;
 };
 
+/* Value structure for tag EXT4_FC_TAG_DAX_BYTELOG_ANCHOR. */
+struct ext4_fc_bytelog_entry {
+	__le32 fc_tid;
+	__le64 fc_head;
+	__le64 fc_tail;
+	__le64 fc_seq;
+	__le32 fc_crc;
+};
+
 /* Tag base length */
 #define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl))
 
@@ -97,6 +107,7 @@ enum {
 	EXT4_FC_REASON_FALLOC_RANGE,
 	EXT4_FC_REASON_INODE_JOURNAL_DATA,
 	EXT4_FC_REASON_ENCRYPTED_FILENAME,
+	EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW,
 	EXT4_FC_REASON_MIGRATE,
 	EXT4_FC_REASON_VERITY,
 	EXT4_FC_REASON_MOVE_EXT,
@@ -181,6 +192,8 @@ static inline const char *tag2str(__u16 tag)
 		return "TAIL";
 	case EXT4_FC_TAG_HEAD:
 		return "HEAD";
+	case EXT4_FC_TAG_DAX_BYTELOG_ANCHOR:
+		return "BYTELOG_ANCHOR";
 	default:
 		return "ERROR";
 	}
diff --git a/fs/ext4/fast_commit_bytelog.c b/fs/ext4/fast_commit_bytelog.c
index 64ba3edddbcb..77ac1d9ef031 100644
--- a/fs/ext4/fast_commit_bytelog.c
+++ b/fs/ext4/fast_commit_bytelog.c
@@ -455,6 +455,26 @@ void ext4_fc_bytelog_release(struct super_block *sb)
 	memset(&sbi->s_fc_bytelog, 0, sizeof(sbi->s_fc_bytelog));
 }
 
+void ext4_fc_bytelog_anchor_to_disk(struct ext4_fc_bytelog_entry *dst,
+				    const struct ext4_fc_bytelog_anchor *src)
+{
+	dst->fc_tid = cpu_to_le32(src->tid);
+	dst->fc_head = cpu_to_le64(src->head);
+	dst->fc_tail = cpu_to_le64(src->tail);
+	dst->fc_seq = cpu_to_le64(src->seq);
+	dst->fc_crc = cpu_to_le32(src->crc);
+}
+
+void ext4_fc_bytelog_anchor_from_disk(struct ext4_fc_bytelog_anchor *dst,
+				      const struct ext4_fc_bytelog_entry *src)
+{
+	dst->tid = le32_to_cpu(src->fc_tid);
+	dst->head = le64_to_cpu(src->fc_head);
+	dst->tail = le64_to_cpu(src->fc_tail);
+	dst->seq = le64_to_cpu(src->fc_seq);
+	dst->crc = le32_to_cpu(src->fc_crc);
+}
+
 void ext4_fc_bytelog_reset(struct super_block *sb, bool full)
 {
 	struct ext4_fc_bytelog *log = &EXT4_SB(sb)->s_fc_bytelog;
diff --git a/fs/ext4/fast_commit_bytelog.h b/fs/ext4/fast_commit_bytelog.h
index d52754890222..d3e5b734a02e 100644
--- a/fs/ext4/fast_commit_bytelog.h
+++ b/fs/ext4/fast_commit_bytelog.h
@@ -9,6 +9,7 @@
 struct super_block;
 struct journal_s;
 struct ext4_sb_info;
+struct ext4_fc_bytelog_entry;
 
 #define EXT4_FC_BYTELOG_MAGIC			0x4c424346 /* "FCBL" */
 #define EXT4_FC_BYTELOG_VERSION			1
@@ -109,6 +110,10 @@ int ext4_fc_bytelog_append_vec(struct super_block *sb, u16 tag,
 void ext4_fc_bytelog_build_anchor(struct super_block *sb,
 				  struct ext4_fc_bytelog_anchor *anchor,
 				  u32 tid);
+void ext4_fc_bytelog_anchor_to_disk(struct ext4_fc_bytelog_entry *dst,
+				    const struct ext4_fc_bytelog_anchor *src);
+void ext4_fc_bytelog_anchor_from_disk(struct ext4_fc_bytelog_anchor *dst,
+				      const struct ext4_fc_bytelog_entry *src);
 
 static inline bool ext4_fc_bytelog_record_committed(const struct ext4_fc_bytelog_hdr *hdr)
 {
-- 
2.52.0