[PATCH] erofs-utils: mkfs: support hot-file-list for tar and OCI full sources

Mengdie Yan posted 1 patch 1 month, 1 week ago
include/erofs/inode.h   |  2 +
lib/inode.c             | 48 ++++++++++++++++--------
lib/rebuild.c           |  1 +
lib/tar.c               |  2 +
mkfs/main.c             | 26 +++++++++++--
tests/hotfile-layout.sh | 82 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 142 insertions(+), 19 deletions(-)
[PATCH] erofs-utils: mkfs: support hot-file-list for tar and OCI full sources
Posted by Mengdie Yan 1 month, 1 week ago
Extract hot-file marking helpers so non-local import paths can reuse the
existing ranking logic. This extends --hot-file-list support from local
directories to tar full mode and OCI full mode while keeping index, rvsp
and zerofill modes rejected.

For tar/OCI imports, hot ranks now follow regular entries, hardlink
aliases and auto-created parent directories so hot files remain in the
front layout region even when the source stream does not come from a
local rootfs walk.

Add regression tests for tar full mode, hardlink aliases, unsupported
non-full tar modes, and compressed tar full builds so hot layout remains
effective with zstd compression as well.
---
 include/erofs/inode.h   |  2 +
 lib/inode.c             | 48 ++++++++++++++++--------
 lib/rebuild.c           |  1 +
 lib/tar.c               |  2 +
 mkfs/main.c             | 26 +++++++++++--
 tests/hotfile-layout.sh | 82 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 142 insertions(+), 19 deletions(-)

diff --git a/include/erofs/inode.h b/include/erofs/inode.h
index bf089e8..9aef660 100644
--- a/include/erofs/inode.h
+++ b/include/erofs/inode.h
@@ -43,6 +43,8 @@ int erofs_allocate_inode_bh_data(struct erofs_inode *inode, erofs_blk_t nblocks,
 bool erofs_dentry_is_wht(struct erofs_sb_info *sbi, struct erofs_dentry *d);
 int __erofs_fill_inode(struct erofs_importer *im, struct erofs_inode *inode,
 		       struct stat *st, const char *path);
+void erofs_inode_set_hot(struct erofs_inode *inode, const char *path);
+void erofs_inode_update_hot_file(struct erofs_inode *inode, const char *path);
 struct erofs_inode *erofs_new_inode(struct erofs_sb_info *sbi);
 int erofs_importer_load_tree(struct erofs_importer *im, bool rebuild,
 			     bool incremental);
diff --git a/lib/inode.c b/lib/inode.c
index cabe085..1fae8c2 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -1326,6 +1326,37 @@ int __erofs_fill_inode(struct erofs_importer *im, struct erofs_inode *inode,
 	return 0;
 }
 
+void erofs_inode_set_hot(struct erofs_inode *inode, const char *path)
+{
+	inode->hot_rank = EROFS_HOT_RANK_NONE;
+	inode->hotfile = false;
+	inode->hotdir = false;
+
+	if (erofs_is_special_identifier(path))
+		return;
+
+	inode->hot_rank = erofs_get_hot_file_rank(path);
+	inode->hotfile = inode->hot_rank != EROFS_HOT_RANK_NONE;
+	if (!inode->hotfile && S_ISDIR(inode->i_mode)) {
+		inode->hot_rank = erofs_get_hot_dir_rank(path);
+		inode->hotdir = inode->hot_rank != EROFS_HOT_RANK_NONE;
+	}
+}
+
+void erofs_inode_update_hot_file(struct erofs_inode *inode, const char *path)
+{
+	unsigned int rank;
+
+	if (erofs_is_special_identifier(path))
+		return;
+
+	rank = erofs_get_hot_file_rank(path);
+	if (rank != EROFS_HOT_RANK_NONE && rank < inode->hot_rank) {
+		inode->hot_rank = rank;
+		inode->hotfile = true;
+	}
+}
+
 static int erofs_fill_inode(struct erofs_importer *im, struct erofs_inode *inode,
 			    struct stat *st, const char *path)
 {
@@ -1363,15 +1394,7 @@ static int erofs_fill_inode(struct erofs_importer *im, struct erofs_inode *inode
 		if (!inode->i_srcpath)
 			return -ENOMEM;
 	}
-	inode->hot_rank = EROFS_HOT_RANK_NONE;
-	if (!erofs_is_special_identifier(path)) {
-		inode->hot_rank = erofs_get_hot_file_rank(path);
-		inode->hotfile = inode->hot_rank != EROFS_HOT_RANK_NONE;
-		if (!inode->hotfile && S_ISDIR(st->st_mode)) {
-			inode->hot_rank = erofs_get_hot_dir_rank(path);
-			inode->hotdir = inode->hot_rank != EROFS_HOT_RANK_NONE;
-		}
-	}
+	erofs_inode_set_hot(inode, path);
 
 	if (erofs_should_use_inode_extended(im, inode, path)) {
 		if (params->force_inodeversion == EROFS_FORCE_INODE_COMPACT) {
@@ -1445,12 +1468,7 @@ static struct erofs_inode *erofs_iget_from_local(struct erofs_importer *im,
 	if (!S_ISDIR(st.st_mode) && !params->hard_dereference) {
 		inode = erofs_iget(st.st_dev, st.st_ino);
 		if (inode) {
-			u32 rank = erofs_get_hot_file_rank(path);
-
-			if (rank != EROFS_HOT_RANK_NONE && rank < inode->hot_rank) {
-				inode->hot_rank = rank;
-				inode->hotfile = true;
-			}
+			erofs_inode_update_hot_file(inode, path);
 			return inode;
 		}
 	}
diff --git a/lib/rebuild.c b/lib/rebuild.c
index 74bbeda..371a542 100644
--- a/lib/rebuild.c
+++ b/lib/rebuild.c
@@ -61,6 +61,7 @@ static struct erofs_dentry *erofs_rebuild_mkdir(struct erofs_inode *dir,
 	inode->i_mtime_nsec = dir->i_mtime_nsec;
 	inode->dev = dir->dev;
 	inode->i_nlink = 2;
+	erofs_inode_set_hot(inode, inode->i_srcpath);
 
 	d = erofs_d_alloc(dir, s);
 	if (IS_ERR(d)) {
diff --git a/lib/tar.c b/lib/tar.c
index d2dc141..8052249 100644
--- a/lib/tar.c
+++ b/lib/tar.c
@@ -1039,6 +1039,7 @@ out_eot:
 
 		inode = erofs_igrab(d2->inode);
 		++inode->i_nlink;
+		erofs_inode_update_hot_file(inode, eh.path);
 		if (d->type != EROFS_FT_UNKNOWN) {
 			tarerofs_remove_inode(d->inode);
 			erofs_iput(d->inode);
@@ -1096,6 +1097,7 @@ new_inode:
 	ret = __erofs_fill_inode(im, inode, &st, eh.path);
 	if (ret)
 		goto out;
+	erofs_inode_set_hot(inode, eh.path);
 	inode->i_size = st.st_size;
 
 	if (!S_ISDIR(inode->i_mode)) {
diff --git a/mkfs/main.c b/mkfs/main.c
index c154247..7ed7844 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -213,8 +213,8 @@ static void usage(int argc, char **argv)
 		" --gid-offset=#         add offset # to all file gids (# = id offset)\n"
 		" --hard-dereference     dereference hardlinks, add links as separate inodes\n"
 		" --hot-file-list=X      specify newline-separated hot file paths for\n"
-		"                        local directory sources; local rootfs builds\n"
-		"                        resolve symlink aliases\n"
+		"                        local directory, tar full, or OCI full sources;\n"
+		"                        local rootfs builds resolve symlink aliases\n"
 		"                        (e.g. /lib/... -> /usr/lib/...) and prioritize\n"
 		"                        ancestor directories as well\n"
 		" --ignore-mtime         use build time instead of strict per-file modification time\n"
@@ -331,6 +331,24 @@ static enum {
 	EROFS_MKFS_SOURCE_REBUILD,
 } source_mode;
 
+static bool mkfs_hotfile_supported_source(void)
+{
+	switch (source_mode) {
+	case EROFS_MKFS_SOURCE_LOCALDIR:
+		return true;
+	case EROFS_MKFS_SOURCE_TAR:
+		return !erofstar.index_mode &&
+		       dataimport_mode != EROFS_MKFS_DATA_IMPORT_RVSP &&
+		       dataimport_mode != EROFS_MKFS_DATA_IMPORT_ZEROFILL;
+	case EROFS_MKFS_SOURCE_OCI:
+		return !mkfs_oci_tarindex_mode &&
+		       dataimport_mode != EROFS_MKFS_DATA_IMPORT_RVSP &&
+		       dataimport_mode != EROFS_MKFS_DATA_IMPORT_ZEROFILL;
+	default:
+		return false;
+	}
+}
+
 static unsigned int rebuild_src_count;
 static LIST_HEAD(rebuild_src_list);
 static u8 fixeduuid[16];
@@ -1557,8 +1575,8 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params,
 			return err;
 	}
 
-	if (hotfile_list_path && source_mode != EROFS_MKFS_SOURCE_LOCALDIR) {
-		erofs_err("--hot-file-list is only supported for local directory sources");
+	if (hotfile_list_path && !mkfs_hotfile_supported_source()) {
+		erofs_err("--hot-file-list is only supported for local directories, tar full mode, and OCI full mode");
 		return -EOPNOTSUPP;
 	}
 
diff --git a/tests/hotfile-layout.sh b/tests/hotfile-layout.sh
index 6af3f41..659972a 100755
--- a/tests/hotfile-layout.sh
+++ b/tests/hotfile-layout.sh
@@ -179,6 +179,53 @@ assert_root_dirdata_precedes_hot_file() {
 	fi
 }
 
+assert_tar_hot_file_precedes_cold_file() {
+	img="$1"
+
+	hot=$(extent_start /a/hot "$img")
+	cold=$(extent_start /b/cold "$img")
+
+	if [ "$hot" -ge "$cold" ]; then
+		echo "tar hot file was not placed before cold file: hot=$hot cold=$cold" >&2
+		exit 1
+	fi
+}
+
+assert_tar_hot_hardlink_alias_precedes_cold_file() {
+	img="$1"
+
+	hot=$(extent_start /b/hot-alias "$img")
+	cold=$(extent_start /a/cold "$img")
+	links=$(inode_links /b/hot-alias "$img")
+
+	if [ "$hot" -ge "$cold" ]; then
+		echo "tar hot hardlink alias was not placed before cold file: hot=$hot cold=$cold" >&2
+		exit 1
+	fi
+
+	if [ "$links" -ne 2 ]; then
+		echo "tar hot hardlink alias changed link count: links=$links" >&2
+		exit 1
+	fi
+}
+
+assert_hotfile_mode_rejected() {
+	log="$1"
+	shift
+
+	if "$@" >"$log" 2>&1; then
+		echo "unsupported hot-file-list mode unexpectedly succeeded" >&2
+		cat "$log" >&2
+		exit 1
+	fi
+
+	if ! grep -q -- '--hot-file-list is only supported' "$log"; then
+		echo "unsupported hot-file-list mode failed with an unexpected error" >&2
+		cat "$log" >&2
+		exit 1
+	fi
+}
+
 root="$tmpdir/root"
 mkdir -p "$root/a" "$root/c"
 printf hot > "$root/a/hot"
@@ -319,3 +366,38 @@ printf '/zz/hot\n' > "$tmpdir/hotlist.wide-root"
 "$MKFS" -d9 --hot-file-list="$tmpdir/hotlist.wide-root" \
 	"$tmpdir/img.wide-root.erofs" "$root_wide" >"$tmpdir/mkfs.wide-root.log" 2>&1
 assert_root_dirdata_precedes_hot_file "$tmpdir/img.wide-root.erofs"
+
+root_tar="$tmpdir/root-tar"
+mkdir -p "$root_tar/a" "$root_tar/b"
+dd if=/dev/zero bs=4096 count=64 of="$root_tar/b/cold" status=none
+dd if=/dev/zero bs=4096 count=64 of="$root_tar/a/hot" status=none
+(cd "$root_tar" && tar cf "$tmpdir/root.tar" b/cold a/hot)
+printf '/a/hot\n' > "$tmpdir/hotlist.tar"
+"$MKFS" -d9 --tar=f --hot-file-list="$tmpdir/hotlist.tar" \
+	"$tmpdir/img.tar.erofs" "$tmpdir/root.tar" >"$tmpdir/mkfs.tar.log" 2>&1
+assert_tar_hot_file_precedes_cold_file "$tmpdir/img.tar.erofs"
+"$MKFS" -d9 -zzstd,level=9 --workers=1 --tar=f \
+	--hot-file-list="$tmpdir/hotlist.tar" \
+	"$tmpdir/img.tar.zstd.erofs" "$tmpdir/root.tar" >"$tmpdir/mkfs.tar.zstd.log" 2>&1
+assert_tar_hot_file_precedes_cold_file "$tmpdir/img.tar.zstd.erofs"
+assert_hotfile_mode_rejected "$tmpdir/mkfs.tar-index.log" \
+	"$MKFS" --tar=i --hot-file-list="$tmpdir/hotlist.tar" \
+	"$tmpdir/img.tar-index.erofs" "$tmpdir/root.tar"
+assert_hotfile_mode_rejected "$tmpdir/mkfs.tar-zerofill.log" \
+	"$MKFS" --tar=f --clean=0 --hot-file-list="$tmpdir/hotlist.tar" \
+	"$tmpdir/img.tar-zerofill.erofs" "$tmpdir/root.tar"
+
+root_tar_hardlink="$tmpdir/root-tar-hardlink"
+mkdir -p "$root_tar_hardlink/a" "$root_tar_hardlink/b" "$root_tar_hardlink/z"
+dd if=/dev/zero bs=4096 count=64 of="$root_tar_hardlink/a/cold" status=none
+dd if=/dev/zero bs=4096 count=64 of="$root_tar_hardlink/z/original" status=none
+ln "$root_tar_hardlink/z/original" "$root_tar_hardlink/b/hot-alias"
+(cd "$root_tar_hardlink" && tar cf "$tmpdir/root-hardlink.tar" a/cold z/original b/hot-alias)
+printf '/b/hot-alias\n' > "$tmpdir/hotlist.tar-hardlink"
+"$MKFS" -d9 --tar=f --hot-file-list="$tmpdir/hotlist.tar-hardlink" \
+	"$tmpdir/img.tar-hardlink.erofs" "$tmpdir/root-hardlink.tar" >"$tmpdir/mkfs.tar-hardlink.log" 2>&1
+assert_tar_hot_hardlink_alias_precedes_cold_file "$tmpdir/img.tar-hardlink.erofs"
+"$MKFS" -d9 -zzstd,level=9 --workers=1 --tar=f \
+	--hot-file-list="$tmpdir/hotlist.tar-hardlink" \
+	"$tmpdir/img.tar-hardlink.zstd.erofs" "$tmpdir/root-hardlink.tar" >"$tmpdir/mkfs.tar-hardlink.zstd.log" 2>&1
+assert_tar_hot_hardlink_alias_precedes_cold_file "$tmpdir/img.tar-hardlink.zstd.erofs"
-- 
2.43.7